diff --git a/.gitignore b/.gitignore index fecd7b4..9a71415 100755 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,26 @@ wing_source_*.tar.gz __pycache__/ *.pyc +# prediction/ Python 엔진 (로컬 실행 결과물) +prediction/**/__pycache__/ +prediction/**/*.pyc +# prediction/ opendrift 결과물 (로컬 실행 결과물) +prediction/opendrift/result/ +prediction/opendrift/logs/ +prediction/opendrift/uvicorn.pid +prediction/opendrift/.env +# prediction/ 이미지분석 결과물 (로컬 실행 결과물) +prediction/image/stitch/ +prediction/image/mx15hdi/Detect/Mask_result/ +prediction/image/mx15hdi/Detect/result/ +prediction/image/mx15hdi/Georeference/Mask_Tif/ +prediction/image/mx15hdi/Georeference/Tif/ +prediction/image/mx15hdi/Metadata/CSV/ +prediction/image/mx15hdi/Metadata/Image/Original_Images/ +prediction/image/mx15hdi/Polygon/Shp/ +# prediction/ 이미지분석 대용량 바이너리 (모델 가중치) +prediction/image/**/*.pth + # HNS manual images (large binary) frontend/public/hns-manual/pages/ frontend/public/hns-manual/images/ @@ -63,6 +83,7 @@ frontend/public/hns-manual/images/ !.claude/ .claude/settings.local.json .claude/CLAUDE.local.md +*.local # Team workflow (managed by /sync-team-workflow) .claude/rules/ diff --git a/backend/package-lock.json b/backend/package-lock.json index a3d5138..64418c7 100755 --- a/backend/package-lock.json +++ b/backend/package-lock.json @@ -8,6 +8,7 @@ "name": "backend", "version": "1.0.0", "dependencies": { + "@types/multer": "^2.1.0", "bcrypt": "^6.0.0", "cookie-parser": "^1.4.7", "cors": "^2.8.5", @@ -17,6 +18,7 @@ "google-auth-library": "^10.6.1", "helmet": "^8.1.0", "jsonwebtoken": "^9.0.3", + "multer": "^2.1.1", "pg": "^8.19.0" }, "devDependencies": { @@ -515,7 +517,6 @@ "version": "1.19.6", "resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.6.tgz", "integrity": "sha512-HLFeCYgz89uk22N5Qg3dvGvsv46B8GLvKKo1zKG4NybA8U2DiEO3w9lqGg29t/tfLRJpJ6iQxnVw4OnB7MoM9g==", - "dev": true, "license": "MIT", "dependencies": { "@types/connect": "*", @@ -526,7 +527,6 @@ "version": "3.4.38", "resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.38.tgz", "integrity": "sha512-K6uROf1LD88uDQqJCktA4yzL1YYAK6NgfsI0v/mTgyPKWsX1CnJ0XPSDhViejru1GcRkLWb8RlzFYJRqGUbaug==", - "dev": true, "license": "MIT", "dependencies": { "@types/node": "*" @@ -556,7 +556,6 @@ "version": "5.0.6", "resolved": "https://registry.npmjs.org/@types/express/-/express-5.0.6.tgz", "integrity": "sha512-sKYVuV7Sv9fbPIt/442koC7+IIwK5olP1KWeD88e/idgoJqDm3JV/YUiPwkoKK92ylff2MGxSz1CSjsXelx0YA==", - "dev": true, "license": "MIT", "dependencies": { "@types/body-parser": "*", @@ -568,7 +567,6 @@ "version": "5.1.1", "resolved": "https://registry.npmjs.org/@types/express-serve-static-core/-/express-serve-static-core-5.1.1.tgz", "integrity": "sha512-v4zIMr/cX7/d2BpAEX3KNKL/JrT1s43s96lLvvdTmza1oEvDudCqK9aF/djc/SWgy8Yh0h30TZx5VpzqFCxk5A==", - "dev": true, "license": "MIT", "dependencies": { "@types/node": "*", @@ -591,7 +589,6 @@ "version": "2.0.5", "resolved": "https://registry.npmjs.org/@types/http-errors/-/http-errors-2.0.5.tgz", "integrity": "sha512-r8Tayk8HJnX0FztbZN7oVqGccWgw98T/0neJphO91KkmOzug1KkofZURD4UaD5uH8AqcFLfdPErnBod0u71/qg==", - "dev": true, "license": "MIT" }, "node_modules/@types/jsonwebtoken": { @@ -612,11 +609,19 @@ "dev": true, "license": "MIT" }, + "node_modules/@types/multer": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/@types/multer/-/multer-2.1.0.tgz", + "integrity": "sha512-zYZb0+nJhOHtPpGDb3vqPjwpdeGlGC157VpkqNQL+UU2qwoacoQ7MpsAmUptI/0Oa127X32JzWDqQVEXp2RcIA==", + "license": "MIT", + "dependencies": { + "@types/express": "*" + } + }, "node_modules/@types/node": { "version": "22.19.11", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.19.11.tgz", "integrity": "sha512-BH7YwL6rA93ReqeQS1c4bsPpcfOmJasG+Fkr6Y59q83f9M1WcBRHR2vM+P9eOisYRcN3ujQoiZY8uk5W+1WL8w==", - "dev": true, "license": "MIT", "dependencies": { "undici-types": "~6.21.0" @@ -638,21 +643,18 @@ "version": "6.14.0", "resolved": "https://registry.npmjs.org/@types/qs/-/qs-6.14.0.tgz", "integrity": "sha512-eOunJqu0K1923aExK6y8p6fsihYEn/BYuQ4g0CxAAgFc4b/ZLN4CrsRZ55srTdqoiLzU2B2evC+apEIxprEzkQ==", - "dev": true, "license": "MIT" }, "node_modules/@types/range-parser": { "version": "1.2.7", "resolved": "https://registry.npmjs.org/@types/range-parser/-/range-parser-1.2.7.tgz", "integrity": "sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==", - "dev": true, "license": "MIT" }, "node_modules/@types/send": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/@types/send/-/send-1.2.1.tgz", "integrity": "sha512-arsCikDvlU99zl1g69TcAB3mzZPpxgw0UQnaHeC1Nwb015xp8bknZv5rIfri9xTOcMuaVgvabfIRA7PSZVuZIQ==", - "dev": true, "license": "MIT", "dependencies": { "@types/node": "*" @@ -662,7 +664,6 @@ "version": "2.2.0", "resolved": "https://registry.npmjs.org/@types/serve-static/-/serve-static-2.2.0.tgz", "integrity": "sha512-8mam4H1NHLtu7nmtalF7eyBH14QyOASmcxHhSfEoRyr0nP/YdoesEtU+uSRvMe96TW/HPTtkoKqQLl53N7UXMQ==", - "dev": true, "license": "MIT", "dependencies": { "@types/http-errors": "*", @@ -715,6 +716,12 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, + "node_modules/append-field": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/append-field/-/append-field-1.0.0.tgz", + "integrity": "sha512-klpgFSWLW1ZEs8svjfb7g4qWY0YS5imI82dTg+QahUvJ8YqAY0P10Uk8tTyh9ZGuYEZEMaeJYCF5BFuX552hsw==", + "license": "MIT" + }, "node_modules/array-flatten": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/array-flatten/-/array-flatten-1.1.1.tgz", @@ -836,6 +843,23 @@ "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==", "license": "BSD-3-Clause" }, + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "license": "MIT" + }, + "node_modules/busboy": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz", + "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==", + "dependencies": { + "streamsearch": "^1.1.0" + }, + "engines": { + "node": ">=10.16.0" + } + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", @@ -892,6 +916,21 @@ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "license": "MIT" }, + "node_modules/concat-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/concat-stream/-/concat-stream-2.0.0.tgz", + "integrity": "sha512-MWufYdFw53ccGjCA+Ol7XJYpAlW6/prSMzuPOTRnJGcGzuhLn4Scrz7qf6o8bROZ514ltazcIFJZevcfbo0x7A==", + "engines": [ + "node >= 6.0" + ], + "license": "MIT", + "dependencies": { + "buffer-from": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.0.2", + "typedarray": "^0.0.6" + } + }, "node_modules/content-disposition": { "version": "0.5.4", "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-0.5.4.tgz", @@ -1840,6 +1879,25 @@ "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "license": "MIT" }, + "node_modules/multer": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/multer/-/multer-2.1.1.tgz", + "integrity": "sha512-mo+QTzKlx8R7E5ylSXxWzGoXoZbOsRMpyitcht8By2KHvMbf3tjwosZ/Mu/XYU6UuJ3VZnODIrak5ZrPiPyB6A==", + "license": "MIT", + "dependencies": { + "append-field": "^1.0.0", + "busboy": "^1.6.0", + "concat-stream": "^2.0.0", + "type-is": "^1.6.18" + }, + "engines": { + "node": ">= 10.16.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/negotiator": { "version": "0.6.3", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.3.tgz", @@ -2178,6 +2236,20 @@ "node": ">=0.10.0" } }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/resolve-pkg-maps": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", @@ -2424,6 +2496,23 @@ "node": ">= 0.8" } }, + "node_modules/streamsearch": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz", + "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==", + "engines": { + "node": ">=10.0.0" + } + }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, "node_modules/string-width": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", @@ -2562,6 +2651,12 @@ "node": ">= 0.6" } }, + "node_modules/typedarray": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/typedarray/-/typedarray-0.0.6.tgz", + "integrity": "sha512-/aCDEGatGvZ2BIk+HmLf4ifCJFwvKFNb9/JeZPMulfgFracn9QFcAf5GO8B/mweUjSoblS5In0cWhqpfs/5PQA==", + "license": "MIT" + }, "node_modules/typescript": { "version": "5.9.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", @@ -2580,7 +2675,6 @@ "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", - "dev": true, "license": "MIT" }, "node_modules/unpipe": { @@ -2592,6 +2686,12 @@ "node": ">= 0.8" } }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, "node_modules/utils-merge": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/utils-merge/-/utils-merge-1.0.1.tgz", diff --git a/backend/package.json b/backend/package.json index 1e578cb..50ceb73 100755 --- a/backend/package.json +++ b/backend/package.json @@ -9,6 +9,7 @@ "db:seed": "tsx src/db/seed.ts" }, "dependencies": { + "@types/multer": "^2.1.0", "bcrypt": "^6.0.0", "cookie-parser": "^1.4.7", "cors": "^2.8.5", @@ -18,6 +19,7 @@ "google-auth-library": "^10.6.1", "helmet": "^8.1.0", "jsonwebtoken": "^9.0.3", + "multer": "^2.1.1", "pg": "^8.19.0" }, "devDependencies": { diff --git a/backend/src/aerial/aerialRouter.ts b/backend/src/aerial/aerialRouter.ts index a002e77..3adb1c5 100644 --- a/backend/src/aerial/aerialRouter.ts +++ b/backend/src/aerial/aerialRouter.ts @@ -1,7 +1,10 @@ import express from 'express'; +import multer from 'multer'; import { listMedia, createMedia, + getMediaBySn, + fetchOriginalImage, listCctv, listSatRequests, createSatRequest, @@ -9,11 +12,13 @@ import { isValidSatStatus, requestOilInference, checkInferenceHealth, + stitchImages, } from './aerialService.js'; import { isValidNumber } from '../middleware/security.js'; import { requireAuth, requirePermission } from '../auth/authMiddleware.js'; const router = express.Router(); +const stitchUpload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 50 * 1024 * 1024 } }); // ============================================================ // AERIAL_MEDIA 라우트 @@ -63,6 +68,40 @@ router.post('/media', requireAuth, requirePermission('aerial', 'CREATE'), async } }); +// GET /api/aerial/media/:sn/download — 원본 이미지 다운로드 +router.get('/media/:sn/download', requireAuth, requirePermission('aerial', 'READ'), async (req, res) => { + try { + const sn = parseInt(req.params['sn'] as string, 10); + if (!isValidNumber(sn, 1, 999999)) { + res.status(400).json({ error: '유효하지 않은 미디어 번호' }); + return; + } + + const media = await getMediaBySn(sn); + if (!media) { + res.status(404).json({ error: '미디어를 찾을 수 없습니다.' }); + return; + } + + // fileId 추출: FILE_NM의 앞 36자가 UUID 형식인지 검증 (이미지 분석 생성 레코드만 다운로드 가능) + const fileId = media.fileNm.substring(0, 36); + const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i; + if (!UUID_PATTERN.test(fileId) || !media.equipNm) { + res.status(404).json({ error: '다운로드 가능한 이미지가 없습니다.' }); + return; + } + + const buffer = await fetchOriginalImage(media.equipNm, fileId); + const downloadName = media.orgnlNm ?? media.fileNm; + res.setHeader('Content-Type', 'image/jpeg'); + res.setHeader('Content-Disposition', `attachment; filename*=UTF-8''${encodeURIComponent(downloadName)}`); + res.send(buffer); + } catch (err) { + console.error('[aerial] 이미지 다운로드 오류:', err); + res.status(502).json({ error: '이미지 다운로드 실패' }); + } +}); + // ============================================================ // CCTV_CAMERA 라우트 // ============================================================ @@ -257,6 +296,39 @@ router.post('/oil-detect', express.json({ limit: '3mb' }), requireAuth, requireP } }); +// ============================================================ +// STITCH (이미지 합성) 라우트 +// ============================================================ + +// POST /api/aerial/stitch — 여러 이미지를 합성하여 JPEG 반환 +router.post( + '/stitch', + requireAuth, + requirePermission('aerial', 'READ'), + stitchUpload.array('files', 6), + async (req, res) => { + try { + const files = req.files as Express.Multer.File[]; + if (!files || files.length < 2) { + res.status(400).json({ error: '이미지를 최소 2장 이상 선택해주세요.' }); + return; + } + const fileId = `stitch_${Date.now()}`; + const buffer = await stitchImages(files, fileId); + res.type('image/jpeg').send(buffer); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + if (message.includes('abort') || message.includes('timeout')) { + console.error('[aerial] 스티칭 서버 타임아웃:', message); + res.status(504).json({ error: '이미지 합성 서버 응답 시간 초과' }); + return; + } + console.error('[aerial] 이미지 합성 오류:', err); + res.status(503).json({ error: '이미지 합성 서버 연결 불가' }); + } + } +); + // GET /api/aerial/oil-detect/health — 추론 서버 상태 확인 router.get('/oil-detect/health', requireAuth, async (_req, res) => { const health = await checkInferenceHealth(); diff --git a/backend/src/aerial/aerialService.ts b/backend/src/aerial/aerialService.ts index aec19b9..23c6391 100644 --- a/backend/src/aerial/aerialService.ts +++ b/backend/src/aerial/aerialService.ts @@ -49,6 +49,26 @@ function rowToMedia(r: Record): AerialMediaItem { }; } +export async function getMediaBySn(sn: number): Promise { + const { rows } = await wingPool.query( + `SELECT AERIAL_MEDIA_SN, ACDNT_SN, FILE_NM, ORGNL_NM, FILE_PATH, + LON, LAT, LOC_DC, EQUIP_TP_CD, EQUIP_NM, MEDIA_TP_CD, + TAKNG_DTM, FILE_SZ, RESOLUTION, REG_DTM + FROM wing.AERIAL_MEDIA WHERE AERIAL_MEDIA_SN = $1 AND USE_YN = 'Y'`, + [sn] + ); + return rows.length > 0 ? rowToMedia(rows[0]) : null; +} + +export async function fetchOriginalImage(camTy: string, fileId: string): Promise { + const res = await fetch(`${IMAGE_API_URL}/get-original-image/${camTy}/${fileId}`, { + signal: AbortSignal.timeout(30_000), + }); + if (!res.ok) throw new Error(`이미지 서버 응답: ${res.status}`); + const base64 = await res.json() as string; + return Buffer.from(base64, 'base64'); +} + export async function listMedia(input: ListMediaInput): Promise { const conditions: string[] = ["USE_YN = 'Y'"]; const params: (string | number)[] = []; @@ -109,8 +129,8 @@ export async function createMedia(input: { TAKNG_DTM, FILE_SZ, RESOLUTION ) VALUES ( $1, $2, $3, $4, - $5, $6, - CASE WHEN $5 IS NOT NULL AND $6 IS NOT NULL THEN ST_SetSRID(ST_MakePoint($5::float, $6::float), 4326) END, + $5::float8, $6::float8, + CASE WHEN $5 IS NOT NULL AND $6 IS NOT NULL THEN ST_SetSRID(ST_MakePoint($5, $6), 4326) END, $7, $8, $9, $10, $11, $12, $13 ) RETURNING AERIAL_MEDIA_SN`, @@ -344,7 +364,7 @@ export async function updateSatRequestStatus(sn: number, sttsCd: string): Promis // OIL INFERENCE (GPU 서버 프록시) // ============================================================ -const OIL_INFERENCE_URL = process.env.OIL_INFERENCE_URL || 'http://localhost:8090'; +const IMAGE_API_URL = process.env.IMAGE_API_URL ?? 'http://localhost:5001'; const INFERENCE_TIMEOUT_MS = 10_000; export interface OilInferenceRegion { @@ -362,13 +382,34 @@ export interface OilInferenceResult { regions: OilInferenceRegion[]; } +/** 여러 이미지를 이미지 분석 서버의 /stitch 엔드포인트로 전송해 합성 JPEG를 반환한다. */ +export async function stitchImages( + files: Express.Multer.File[], + fileId: string +): Promise { + const form = new FormData(); + form.append('fileId', fileId); + for (const f of files) { + form.append('files', new Blob([f.buffer], { type: f.mimetype }), f.originalname); + } + const response = await fetch(`${IMAGE_API_URL}/stitch`, { + method: 'POST', + body: form, + signal: AbortSignal.timeout(300_000), + }); + if (!response.ok) { + const detail = await response.text().catch(() => ''); + throw new Error(`stitch server responded ${response.status}: ${detail}`); + } + return Buffer.from(await response.arrayBuffer()); +} + /** GPU 추론 서버에 이미지를 전송하고 세그멘테이션 결과를 반환한다. */ export async function requestOilInference(imageBase64: string): Promise { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), INFERENCE_TIMEOUT_MS); - try { - const response = await fetch(`${OIL_INFERENCE_URL}/inference`, { + const response = await fetch(`${IMAGE_API_URL}/inference`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ image: imageBase64 }), @@ -389,7 +430,7 @@ export async function requestOilInference(imageBase64: string): Promise { try { - const response = await fetch(`${OIL_INFERENCE_URL}/health`, { + const response = await fetch(`${IMAGE_API_URL}/health`, { signal: AbortSignal.timeout(3000), }); if (!response.ok) throw new Error(`status ${response.status}`); diff --git a/backend/src/prediction/imageAnalyzeService.ts b/backend/src/prediction/imageAnalyzeService.ts new file mode 100644 index 0000000..41452e7 --- /dev/null +++ b/backend/src/prediction/imageAnalyzeService.ts @@ -0,0 +1,174 @@ +import crypto from 'crypto'; +import { wingPool } from '../db/wingDb.js'; +import { createMedia } from '../aerial/aerialService.js'; + +const IMAGE_API_URL = process.env.IMAGE_API_URL ?? 'http://localhost:5001'; + +// 유류 클래스 → UI 유종명 매핑 +const CLASS_ID_TO_OIL_TYPE: Record = { + '검정': '벙커C유', + '갈색': '벙커C유', + '무지개': '경유', + '은색': '등유', +}; + +// 유종명 → DB 코드 매핑 +const OIL_DB_CODE_MAP: Record = { + '벙커C유': 'BUNKER_C', + '원유': 'CRUDE_OIL', + '경유': 'DIESEL', + '등유': 'GASOLINE', +}; + +interface OilPolygon { + classId: string; + area: number; + volume: number; + note: string; + thickness: number; + wkt: string; +} + +interface ImageServerResponse { + meta: string; + data: OilPolygon[]; +} + +export interface ImageAnalyzeResult { + acdntSn: number; + lat: number; + lon: number; + oilType: string; + area: number; + volume: number; + fileId: string; + occurredAt: string; +} + +/** + * mx15hdi CSV 컬럼 순서: + * Filename, Tlat_d, Tlat_m, Tlat_s, Tlon_d, Tlon_m, Tlon_s, + * Alat_d, Alat_m, Alat_s, Alon_d, Alon_m, Alon_s, + * Az, El, Alt, Date1, Date2, Date3, Time1, Time2, Time3 + */ +function parseMeta(metaStr: string): { lat: number; lon: number; occurredAt: string } { + const parts = metaStr.split(','); + const tlat_d = parseFloat(parts[1]); + const tlat_m = parseFloat(parts[2]); + const tlat_s = parseFloat(parts[3]); + const tlon_d = parseFloat(parts[4]); + const tlon_m = parseFloat(parts[5]); + const tlon_s = parseFloat(parts[6]); + + const lat = tlat_d + tlat_m / 60 + tlat_s / 3600; + const lon = tlon_d + tlon_m / 60 + tlon_s / 3600; + + // Date: Date1(DD), Date2(MM), Date3(YYYY) / Time: Time1(HH), Time2(mm), Time3(ss) + const dd = (parts[16] ?? '01').padStart(2, '0'); + const mm = (parts[17] ?? '01').padStart(2, '0'); + const yyyy = parts[18] ?? new Date().getFullYear().toString(); + const time1 = (parts[19] ?? '00').padStart(2, '0'); + const time2 = (parts[20] ?? '00').padStart(2, '0'); + const occurredAt = `${yyyy}-${mm}-${dd}T${time1}:${time2}:00+09:00`; + + return { lat, lon, occurredAt }; +} + +export async function analyzeImageFile(imageBuffer: Buffer, originalName: string): Promise { + const fileId = crypto.randomUUID(); + + // camTy는 현재 "mx15hdi"로 하드코딩한다. + // TODO: 추후 이미지 EXIF에서 카메라 모델명을 읽어 camTy를 자동 판별하는 로직을 + // 이미지 분석 서버(api.py)에 추가할 예정이다. (check_camera_info 함수 활용) + const camTy = 'mx15hdi'; + + // 이미지 분석 서버 호출 + const formData = new FormData(); + const blob = new Blob([imageBuffer]); + formData.append('camTy', camTy); + formData.append('fileId', fileId); + formData.append('image', blob, originalName); + + let serverResponse: ImageServerResponse; + try { + const res = await fetch(`${IMAGE_API_URL}/run-script/`, { + method: 'POST', + body: formData, + signal: AbortSignal.timeout(300_000), + }); + + if (!res.ok) { + const text = await res.text(); + if (res.status === 400 && text.includes('GPS')) { + throw Object.assign(new Error('GPS_NOT_FOUND'), { code: 'GPS_NOT_FOUND' }); + } + throw new Error(`이미지 분석 서버 오류: ${res.status} - ${text}`); + } + + serverResponse = await res.json() as ImageServerResponse; + } catch (err: unknown) { + if (err instanceof Error && (err as NodeJS.ErrnoException).code === 'GPS_NOT_FOUND') throw err; + if (err instanceof Error && err.name === 'TimeoutError') { + throw Object.assign(new Error('TIMEOUT'), { code: 'TIMEOUT' }); + } + throw err; + } + + // 응답 파싱 + const { lat, lon, occurredAt } = parseMeta(serverResponse.meta); + const firstOil = serverResponse.data[0]; + const oilType = firstOil ? (CLASS_ID_TO_OIL_TYPE[firstOil.classId] ?? '벙커C유') : '벙커C유'; + const area = firstOil?.area ?? 0; + const volume = firstOil?.volume ?? 0; + + // ACDNT INSERT + const acdntNm = `이미지분석_${new Date().toISOString().slice(0, 16).replace('T', ' ')}`; + const acdntRes = await wingPool.query( + `INSERT INTO wing.ACDNT + (ACDNT_CD, ACDNT_NM, ACDNT_TP_CD, OCCRN_DTM, LAT, LNG, ACDNT_STTS_CD, USE_YN, REG_DTM) + VALUES ( + 'INC-' || EXTRACT(YEAR FROM NOW())::TEXT || '-' || + LPAD( + (SELECT COALESCE(MAX(CAST(SPLIT_PART(ACDNT_CD, '-', 3) AS INTEGER)), 0) + 1 + FROM wing.ACDNT + WHERE ACDNT_CD LIKE 'INC-' || EXTRACT(YEAR FROM NOW())::TEXT || '-%')::TEXT, + 4, '0' + ), + $1, '유류유출', $2, $3, $4, 'ACTIVE', 'Y', NOW() + ) + RETURNING ACDNT_SN`, + [acdntNm, occurredAt, lat, lon] + ); + const acdntSn: number = acdntRes.rows[0].acdnt_sn; + + // SPIL_DATA INSERT (img_rslt_data에 분석 원본 저장) + await wingPool.query( + `INSERT INTO wing.SPIL_DATA + (ACDNT_SN, OIL_TP_CD, SPIL_QTY, SPIL_UNIT_CD, SPIL_TP_CD, FCST_HR, IMG_RSLT_DATA, REG_DTM) + VALUES ($1, $2, $3, 'KL', 'CONTINUOUS', 48, $4, NOW())`, + [ + acdntSn, + OIL_DB_CODE_MAP[oilType] ?? 'BUNKER_C', + volume, + JSON.stringify(serverResponse), + ] + ); + + // AERIAL_MEDIA INSERT (영상사진관리 목록에서 조회 가능하도록 저장) + const fileSizeMb = (imageBuffer.length / (1024 * 1024)).toFixed(1) + ' MB'; + await createMedia({ + fileNm: `${fileId}_${originalName}`, + orgnlNm: originalName, + acdntSn, + lon, + lat, + locDc: `${lon.toFixed(4)} + ${lat.toFixed(4)}`, + equipTpCd: 'drone', + equipNm: camTy, + mediaTpCd: '사진', + takngDtm: occurredAt, + fileSz: fileSizeMb, + }); + + return { acdntSn, lat, lon, oilType, area, volume, fileId, occurredAt }; +} diff --git a/backend/src/prediction/predictionRouter.ts b/backend/src/prediction/predictionRouter.ts index c6802e4..d017a19 100644 --- a/backend/src/prediction/predictionRouter.ts +++ b/backend/src/prediction/predictionRouter.ts @@ -1,11 +1,15 @@ import express from 'express'; +import multer from 'multer'; import { listAnalyses, getAnalysisDetail, getBacktrack, listBacktracksByAcdnt, - createBacktrack, saveBoomLine, listBoomLines, + createBacktrack, saveBoomLine, listBoomLines, getAnalysisTrajectory, } from './predictionService.js'; +import { analyzeImageFile } from './imageAnalyzeService.js'; import { isValidNumber } from '../middleware/security.js'; import { requireAuth, requirePermission } from '../auth/authMiddleware.js'; +const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: 50 * 1024 * 1024 } }); + const router = express.Router(); // GET /api/prediction/analyses — 분석 목록 @@ -40,6 +44,26 @@ router.get('/analyses/:acdntSn', requireAuth, requirePermission('prediction', 'R } }); +// GET /api/prediction/analyses/:acdntSn/trajectory — 최신 OpenDrift 결과 조회 +router.get('/analyses/:acdntSn/trajectory', requireAuth, requirePermission('prediction', 'READ'), async (req, res) => { + try { + const acdntSn = parseInt(req.params.acdntSn as string, 10); + if (!isValidNumber(acdntSn, 1, 999999)) { + res.status(400).json({ error: '유효하지 않은 사고 번호' }); + return; + } + const result = await getAnalysisTrajectory(acdntSn); + if (!result) { + res.json({ trajectory: null, summary: null }); + return; + } + res.json(result); + } catch (err) { + console.error('[prediction] trajectory 조회 오류:', err); + res.status(500).json({ error: 'trajectory 조회 실패' }); + } +}); + // GET /api/prediction/backtrack — 사고별 역추적 목록 router.get('/backtrack', requireAuth, requirePermission('prediction', 'READ'), async (req, res) => { try { @@ -124,4 +148,36 @@ router.post('/boom', requireAuth, requirePermission('prediction', 'CREATE'), asy } }); +// POST /api/prediction/image-analyze — 이미지 업로드 분석 +router.post( + '/image-analyze', + requireAuth, + requirePermission('prediction', 'CREATE'), + upload.single('image'), + async (req, res) => { + try { + if (!req.file) { + res.status(400).json({ error: '이미지 파일이 필요합니다' }); + return; + } + const result = await analyzeImageFile(req.file.buffer, req.file.originalname); + res.json(result); + } catch (err: unknown) { + if (err instanceof Error) { + const code = (err as NodeJS.ErrnoException).code; + if (code === 'GPS_NOT_FOUND') { + res.status(422).json({ error: 'GPS_NOT_FOUND', message: 'GPS 정보가 없는 이미지입니다' }); + return; + } + if (code === 'TIMEOUT') { + res.status(504).json({ error: 'TIMEOUT', message: '이미지 분석 서버 응답 시간 초과' }); + return; + } + } + console.error('[prediction] 이미지 분석 오류:', err); + res.status(500).json({ error: '이미지 분석 실패' }); + } + } +); + export default router; diff --git a/backend/src/prediction/predictionService.ts b/backend/src/prediction/predictionService.ts index e46afd3..bdf86a5 100644 --- a/backend/src/prediction/predictionService.ts +++ b/backend/src/prediction/predictionService.ts @@ -404,6 +404,100 @@ export async function saveBoomLine(input: SaveBoomLineInput): Promise<{ boomLine return { boomLineSn: Number((rows[0] as Record)['boom_line_sn']) }; } +interface TrajectoryParticle { + lat: number; + lon: number; + stranded?: 0 | 1; +} + +interface TrajectoryWindPoint { + lat: number; + lon: number; + wind_speed: number; + wind_direction: number; +} + +interface TrajectoryHydrGrid { + lonInterval: number[]; + boundLonLat: { top: number; bottom: number; left: number; right: number }; + rows: number; + cols: number; + latInterval: number[]; +} + +interface TrajectoryTimeStep { + particles: TrajectoryParticle[]; + remaining_volume_m3: number; + weathered_volume_m3: number; + pollution_area_km2: number; + beached_volume_m3: number; + pollution_coast_length_m: number; + center_lat?: number; + center_lon?: number; + wind_data?: TrajectoryWindPoint[]; + hydr_data?: [number[][], number[][]]; + hydr_grid?: TrajectoryHydrGrid; +} + +interface TrajectoryResult { + trajectory: Array<{ lat: number; lon: number; time: number; particle: number; stranded?: 0 | 1 }>; + summary: { + remainingVolume: number; + weatheredVolume: number; + pollutionArea: number; + beachedVolume: number; + pollutionCoastLength: number; + }; + centerPoints: Array<{ lat: number; lon: number; time: number }>; + windData: TrajectoryWindPoint[][]; + hydrData: ({ value: [number[][], number[][]]; grid: TrajectoryHydrGrid } | null)[]; +} + +function transformTrajectoryResult(rawResult: TrajectoryTimeStep[]): TrajectoryResult { + const trajectory = rawResult.flatMap((step, stepIdx) => + step.particles.map((p, i) => ({ + lat: p.lat, + lon: p.lon, + time: stepIdx, + particle: i, + stranded: p.stranded, + })) + ); + const lastStep = rawResult[rawResult.length - 1]; + const summary = { + remainingVolume: lastStep.remaining_volume_m3, + weatheredVolume: lastStep.weathered_volume_m3, + pollutionArea: lastStep.pollution_area_km2, + beachedVolume: lastStep.beached_volume_m3, + pollutionCoastLength: lastStep.pollution_coast_length_m, + }; + const centerPoints = rawResult + .map((step, stepIdx) => + step.center_lat != null && step.center_lon != null + ? { lat: step.center_lat, lon: step.center_lon, time: stepIdx } + : null + ) + .filter((p): p is { lat: number; lon: number; time: number } => p !== null); + const windData = rawResult.map((step) => step.wind_data ?? []); + const hydrData = rawResult.map((step) => + step.hydr_data && step.hydr_grid + ? { value: step.hydr_data, grid: step.hydr_grid } + : null + ); + return { trajectory, summary, centerPoints, windData, hydrData }; +} + +export async function getAnalysisTrajectory(acdntSn: number): Promise { + const sql = ` + SELECT RSLT_DATA FROM wing.PRED_EXEC + WHERE ACDNT_SN = $1 AND ALGO_CD = 'OPENDRIFT' AND EXEC_STTS_CD = 'COMPLETED' + ORDER BY CMPL_DTM DESC LIMIT 1 + `; + const { rows } = await wingPool.query(sql, [acdntSn]); + if (rows.length === 0 || !rows[0].rslt_data) return null; + return transformTrajectoryResult(rows[0].rslt_data as TrajectoryTimeStep[]); +} + export async function listBoomLines(acdntSn: number): Promise { const sql = ` SELECT BOOM_LINE_SN, ACDNT_SN, BOOM_NM, PRIORITY_ORD, diff --git a/backend/src/routes/simulation.ts b/backend/src/routes/simulation.ts index 98bcf84..aa46db2 100755 --- a/backend/src/routes/simulation.ts +++ b/backend/src/routes/simulation.ts @@ -1,227 +1,452 @@ import { Router, Request, Response } from 'express' +import { wingPool } from '../db/wingDb.js' +import { requireAuth } from '../auth/authMiddleware.js' import { isValidLatitude, isValidLongitude, isValidNumber, - isAllowedValue, isValidStringLength, - escapeHtml, } from '../middleware/security.js' const router = Router() -// 허용된 모델 목록 (화이트리스트) -const ALLOWED_MODELS = ['KOSPS', 'POSEIDON', 'OpenDrift', '앙상블'] as const -type AllowedModel = typeof ALLOWED_MODELS[number] +const PYTHON_API_URL = process.env.PYTHON_API_URL ?? 'http://localhost:5003' +const POLL_INTERVAL_MS = 3000 +const POLL_TIMEOUT_MS = 30 * 60 * 1000 // 30분 -// 허용된 유종 목록 -const ALLOWED_OIL_TYPES = ['원유', '벙커C유', '경유', '휘발유', '등유', '윤활유', '기타'] as const - -// 허용된 유출 유형 목록 -const ALLOWED_SPILL_TYPES = ['연속유출', '순간유출'] as const - -interface ParticlePoint { - lat: number - lon: number - time: number - particle: number +// 유종 매핑: 한국어 UI 선택값 → OpenDrift 유종 코드 +// 추후 DB/설정 파일로 외부화 예정 (개발 단계 임시 구현) +const OIL_TYPE_MAP: Record = { + '벙커C유': 'GENERIC BUNKER C', + '경유': 'GENERIC DIESEL', + '원유': 'WEST TEXAS INTERMEDIATE (WTI)', + '중유': 'GENERIC HEAVY FUEL OIL', + '등유': 'FUEL OIL NO.1 (KEROSENE)', + '휘발유': 'GENERIC GASOLINE', } -/** - * POST /api/simulation/run - * 오일 확산 시뮬레이션 실행 - * - * 보안 조치: - * - 화이트리스트 기반 모델명 검증 - * - 좌표 범위 검증 (위도 -90~90, 경도 -180~180) - * - 숫자 범위 검증 (duration, spill_amount) - * - 문자열 길이 제한 - */ -router.post('/run', async (req: Request, res: Response) => { - try { - const { model, lat, lon, duration_hours, oil_type, spill_amount, spill_type } = req.body +// 유종 매핑: 한국어 UI → DB 저장 코드 +const OIL_DB_CODE_MAP: Record = { + '벙커C유': 'BUNKER_C', + '경유': 'DIESEL', + '원유': 'CRUDE_OIL', + '중유': 'HEAVY_FUEL_OIL', + '등유': 'KEROSENE', + '휘발유': 'GASOLINE', +} - // 1. 필수 파라미터 존재 검증 - if (model === undefined || lat === undefined || lon === undefined || duration_hours === undefined) { +// 유출 형태 매핑: 한국어 UI → DB 저장 코드 +const SPIL_TYPE_MAP: Record = { + '연속': 'CONTINUOUS', + '비연속': 'DISCONTINUOUS', + '순간 유출': 'INSTANT', +} + +// 단위 매핑: 한국어 UI → DB 저장 코드 +const UNIT_MAP: Record = { + 'kL': 'KL', 'ton': 'TON', 'barrel': 'BBL', +} + +// ============================================================ +// POST /api/simulation/run +// 확산 시뮬레이션 실행 (OpenDrift) +// ============================================================ +/** + * OpenDrift 확산 시뮬레이션을 실행한다. + * Python FastAPI 서버에 작업을 제출하고 job_id를 받아 + * 백그라운드에서 폴링하며 결과를 DB에 저장한다. + * 프론트엔드는 execSn으로 GET /status/:execSn을 폴링하여 결과를 수신한다. + */ +router.post('/run', requireAuth, async (req: Request, res: Response) => { + try { + const { acdntSn: rawAcdntSn, acdntNm, spillUnit, spillTypeCd, + lat, lon, runTime, matTy, matVol, spillTime, startTime } = req.body + + // 1. 필수 파라미터 검증 + if (lat === undefined || lon === undefined || runTime === undefined) { return res.status(400).json({ error: '필수 파라미터 누락', - required: ['model', 'lat', 'lon', 'duration_hours'] + required: ['lat', 'lon', 'runTime'], }) } - - // 2. 모델명 화이트리스트 검증 - if (!isAllowedValue(model, [...ALLOWED_MODELS])) { - return res.status(400).json({ - error: '유효하지 않은 모델', - message: `허용된 모델: ${ALLOWED_MODELS.join(', ')}`, - }) - } - - // 3. 위도/경도 범위 검증 if (!isValidLatitude(lat)) { - return res.status(400).json({ - error: '유효하지 않은 위도', - message: '위도는 -90 ~ 90 범위의 숫자여야 합니다.' - }) + return res.status(400).json({ error: '유효하지 않은 위도', message: '위도는 -90~90 범위여야 합니다.' }) } if (!isValidLongitude(lon)) { - return res.status(400).json({ - error: '유효하지 않은 경도', - message: '경도는 -180 ~ 180 범위의 숫자여야 합니다.' - }) + return res.status(400).json({ error: '유효하지 않은 경도', message: '경도는 -180~180 범위여야 합니다.' }) + } + if (!isValidNumber(runTime, 1, 720)) { + return res.status(400).json({ error: '유효하지 않은 예측 시간', message: '예측 시간은 1~720 범위여야 합니다.' }) + } + if (matVol !== undefined && !isValidNumber(matVol, 0, 1000000)) { + return res.status(400).json({ error: '유효하지 않은 유출량' }) + } + if (matTy !== undefined && (typeof matTy !== 'string' || !isValidStringLength(matTy, 50))) { + return res.status(400).json({ error: '유효하지 않은 유종' }) + } + // acdntSn 없는 경우 acdntNm 필수 + if (!rawAcdntSn && (!acdntNm || typeof acdntNm !== 'string' || !acdntNm.trim())) { + return res.status(400).json({ error: '사고를 선택하거나 사고명을 입력해야 합니다.' }) + } + if (acdntNm && (typeof acdntNm !== 'string' || !isValidStringLength(acdntNm, 200))) { + return res.status(400).json({ error: '사고명은 200자 이내여야 합니다.' }) } - // 4. 예측 시간 범위 검증 (1~720시간 = 최대 30일) - if (!isValidNumber(duration_hours, 1, 720)) { - return res.status(400).json({ - error: '유효하지 않은 예측 시간', - message: '예측 시간은 1~720 범위의 숫자여야 합니다.' - }) - } + // 1-B. acdntSn 미제공 시 ACDNT + SPIL_DATA 생성 + let resolvedAcdntSn: number | null = rawAcdntSn ? Number(rawAcdntSn) : null + let resolvedSpilDataSn: number | null = null - // 5. 선택적 파라미터 검증 - if (oil_type !== undefined) { - if (typeof oil_type !== 'string' || !isValidStringLength(oil_type, 50)) { - return res.status(400).json({ error: '유효하지 않은 유종' }) + if (!resolvedAcdntSn && acdntNm) { + try { + const occrn = startTime ?? new Date().toISOString() + const acdntRes = await wingPool.query( + `INSERT INTO wing.ACDNT + (ACDNT_CD, ACDNT_NM, ACDNT_TP_CD, OCCRN_DTM, LAT, LNG, ACDNT_STTS_CD, USE_YN, REG_DTM) + VALUES ( + 'INC-' || EXTRACT(YEAR FROM NOW())::TEXT || '-' || + LPAD( + (SELECT COALESCE(MAX(CAST(SPLIT_PART(ACDNT_CD, '-', 3) AS INTEGER)), 0) + 1 + FROM wing.ACDNT + WHERE ACDNT_CD LIKE 'INC-' || EXTRACT(YEAR FROM NOW())::TEXT || '-%')::TEXT, + 4, '0' + ), + $1, '유류유출', $2, $3, $4, 'ACTIVE', 'Y', NOW() + ) + RETURNING ACDNT_SN`, + [acdntNm.trim(), occrn, lat, lon] + ) + resolvedAcdntSn = acdntRes.rows[0].acdnt_sn as number + + const spilRes = await wingPool.query( + `INSERT INTO wing.SPIL_DATA (ACDNT_SN, OIL_TP_CD, SPIL_QTY, SPIL_UNIT_CD, SPIL_TP_CD, FCST_HR, REG_DTM) + VALUES ($1, $2, $3, $4, $5, $6, NOW()) + RETURNING SPIL_DATA_SN`, + [ + resolvedAcdntSn, + OIL_DB_CODE_MAP[matTy as string] ?? 'BUNKER_C', + matVol ?? 0, + UNIT_MAP[spillUnit as string] ?? 'KL', + SPIL_TYPE_MAP[spillTypeCd as string] ?? 'CONTINUOUS', + runTime, + ] + ) + resolvedSpilDataSn = spilRes.rows[0].spil_data_sn as number + } catch (dbErr) { + console.error('[simulation] ACDNT/SPIL_DATA INSERT 실패:', dbErr) + return res.status(500).json({ error: '사고 정보 생성 실패' }) } } - if (spill_amount !== undefined) { - if (!isValidNumber(spill_amount, 0, 1000000)) { - return res.status(400).json({ - error: '유효하지 않은 유출량', - message: '유출량은 0~1,000,000 범위의 숫자여야 합니다.' + // 2. Python NC 파일 존재 여부 확인 + try { + const checkRes = await fetch(`${PYTHON_API_URL}/check-nc`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ lat, lon, startTime }), + signal: AbortSignal.timeout(5000), + }) + if (!checkRes.ok) { + return res.status(409).json({ + error: '해당 좌표의 해양 기상 데이터가 없습니다.', + message: 'NC 파일이 준비되지 않았습니다.', }) } + } catch { + // Python 서버 미기동 — 5번에서 처리 } - if (spill_type !== undefined) { - if (typeof spill_type !== 'string' || !isValidStringLength(spill_type, 50)) { - return res.status(400).json({ error: '유효하지 않은 유출 유형' }) + // 3. 기존 사고의 경우 SPIL_DATA_SN 조회 + if (resolvedAcdntSn && !resolvedSpilDataSn) { + try { + const spilRes = await wingPool.query( + `SELECT SPIL_DATA_SN FROM wing.SPIL_DATA WHERE ACDNT_SN = $1 ORDER BY SPIL_DATA_SN DESC LIMIT 1`, + [resolvedAcdntSn] + ) + if (spilRes.rows.length > 0) { + resolvedSpilDataSn = spilRes.rows[0].spil_data_sn as number + } + } catch (dbErr) { + console.error('[simulation] SPIL_DATA 조회 실패:', dbErr) } } - // 검증 완료 - 시뮬레이션 실행 - const trajectory = generateDemoTrajectory( - lat, - lon, - duration_hours, - model, - 20 + // 4. PRED_EXEC INSERT (PENDING) — ACDNT_SN 포함 (NOT NULL FK) + const execNm = `EXPC_${Date.now()}` + let predExecSn: number + try { + const insertRes = await wingPool.query( + `INSERT INTO wing.PRED_EXEC (ACDNT_SN, SPIL_DATA_SN, ALGO_CD, EXEC_STTS_CD, EXEC_NM, BGNG_DTM) + VALUES ($1, $2, 'OPENDRIFT', 'PENDING', $3, NOW()) + RETURNING PRED_EXEC_SN`, + [resolvedAcdntSn, resolvedSpilDataSn, execNm] + ) + predExecSn = insertRes.rows[0].pred_exec_sn as number + } catch (dbErr) { + console.error('[simulation] PRED_EXEC INSERT 실패:', dbErr) + return res.status(500).json({ error: '분석 기록 생성 실패' }) + } + + // matTy 변환: 한국어 유종 → OpenDrift 유종 코드 + // 매핑 대상이 아니면 원본 값 그대로 사용 (영문 직접 입력 대응) + const odMatTy = matTy !== undefined ? (OIL_TYPE_MAP[matTy as string] ?? (matTy as string)) : undefined + + // 5. Python /run-model 호출 + let jobId: string + try { + const pythonRes = await fetch(`${PYTHON_API_URL}/run-model`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + lat, + lon, + startTime, + runTime, + matTy: odMatTy, + matVol, + spillTime, + name: execNm, + }), + signal: AbortSignal.timeout(10000), + }) + + if (pythonRes.status === 503) { + const errData = await pythonRes.json() as { error?: string } + await wingPool.query( + `UPDATE wing.PRED_EXEC SET EXEC_STTS_CD='FAILED', ERR_MSG=$1, CMPL_DTM=NOW() WHERE PRED_EXEC_SN=$2`, + [errData.error || '분석 서버 포화', predExecSn] + ) + return res.status(503).json({ error: errData.error || '분석 서버가 사용 중입니다. 잠시 후 재시도해 주세요.' }) + } + + if (!pythonRes.ok) { + throw new Error(`Python 서버 응답 오류: ${pythonRes.status}`) + } + + const pythonData = await pythonRes.json() as { job_id: string } + jobId = pythonData.job_id + } catch { + await wingPool.query( + `UPDATE wing.PRED_EXEC SET EXEC_STTS_CD='FAILED', ERR_MSG='Python 분석 서버에 연결할 수 없습니다.', CMPL_DTM=NOW() WHERE PRED_EXEC_SN=$1`, + [predExecSn] + ) + return res.status(503).json({ error: 'Python 분석 서버에 연결할 수 없습니다.' }) + } + + // 6. RUNNING 업데이트 + await wingPool.query( + `UPDATE wing.PRED_EXEC SET EXEC_STTS_CD='RUNNING' WHERE PRED_EXEC_SN=$1`, + [predExecSn] ) - res.json({ - success: true, - model: escapeHtml(String(model)), - parameters: { - lat, - lon, - duration_hours, - oil_type: oil_type ? escapeHtml(String(oil_type)) : undefined, - spill_amount, - spill_type: spill_type ? escapeHtml(String(spill_type)) : undefined, - }, - trajectory, - metadata: { - particle_count: 20, - time_steps: duration_hours + 1, - generated_at: new Date().toISOString() - } - }) + // 7. 즉시 응답 (프론트엔드는 execSn으로 폴링, acdntSn은 신규 생성 사고 추적용) + res.json({ success: true, execSn: predExecSn, acdntSn: resolvedAcdntSn, status: 'RUNNING' }) + + // 8. 백그라운드 폴링 시작 + pollAndSave(jobId, predExecSn).catch((err: unknown) => + console.error('[simulation] pollAndSave 오류:', err) + ) } catch { - // 내부 오류 메시지 노출 방지 - res.status(500).json({ - error: '시뮬레이션 실행 실패', - message: '서버 내부 오류가 발생했습니다.' - }) + res.status(500).json({ error: '시뮬레이션 실행 실패', message: '서버 내부 오류가 발생했습니다.' }) } }) +// ============================================================ +// GET /api/simulation/status/:execSn +// 시뮬레이션 실행 상태 및 결과 조회 +// ============================================================ /** - * 데모 궤적 데이터 생성 + * PRED_EXEC 테이블에서 실행 상태를 조회한다. + * DB 상태(COMPLETED/FAILED)를 프론트 상태(DONE/ERROR)로 매핑하여 반환한다. */ -function generateDemoTrajectory( - startLat: number, - startLon: number, - hours: number, - model: string, - particleCount: number -): ParticlePoint[] { - const trajectory: ParticlePoint[] = [] - - const modelFactors: Record = { - 'KOSPS': 0.004, - 'POSEIDON': 0.006, - 'OpenDrift': 0.005, - '앙상블': 0.0055 +router.get('/status/:execSn', requireAuth, async (req: Request, res: Response) => { + const execSn = parseInt(req.params.execSn as string, 10) + if (isNaN(execSn) || execSn <= 0) { + return res.status(400).json({ error: '유효하지 않은 execSn' }) } - const spreadFactor = modelFactors[model] || 0.005 - const windSpeed = 5.5 - const windDirection = 135 - const currentSpeed = 0.55 - const currentDirection = 120 - const waveHeight = 2.2 + try { + const result = await wingPool.query( + `SELECT pe.EXEC_STTS_CD, pe.RSLT_DATA, pe.ERR_MSG, pe.BGNG_DTM, sd.FCST_HR, + ( + SELECT AVG(hist.REQD_SEC::FLOAT / hsd.FCST_HR) + FROM wing.PRED_EXEC hist + JOIN wing.SPIL_DATA hsd ON hist.SPIL_DATA_SN = hsd.SPIL_DATA_SN + WHERE hist.ALGO_CD = pe.ALGO_CD + AND hist.EXEC_STTS_CD = 'COMPLETED' + AND hist.REQD_SEC IS NOT NULL AND hist.REQD_SEC > 0 + AND hsd.FCST_HR IS NOT NULL AND hsd.FCST_HR > 0 + ) AS avg_sec_per_hr + FROM wing.PRED_EXEC pe + LEFT JOIN wing.SPIL_DATA sd ON pe.SPIL_DATA_SN = sd.SPIL_DATA_SN + WHERE pe.PRED_EXEC_SN=$1`, + [execSn] + ) + if (result.rows.length === 0) { + return res.status(404).json({ error: '분석 기록을 찾을 수 없습니다.' }) + } - const windRadians = (windDirection * Math.PI) / 180 - const currentRadians = (currentDirection * Math.PI) / 180 + const row = result.rows[0] + const dbStatus: string = row.exec_stts_cd as string + // DB 상태 → API 상태 매핑 + const statusMap: Record = { + PENDING: 'PENDING', + RUNNING: 'RUNNING', + COMPLETED: 'DONE', + FAILED: 'ERROR', + } + const status = statusMap[dbStatus] ?? dbStatus - const windWeight = 0.03 - const currentWeight = 0.07 + if (status === 'DONE' && row.rslt_data) { + const { trajectory, summary, centerPoints, windData, hydrData } = transformResult(row.rslt_data as PythonTimeStep[]) + return res.json({ status, trajectory, summary, centerPoints, windData, hydrData }) + } - const mainDriftLat = - Math.sin(windRadians) * windSpeed * windWeight + - Math.sin(currentRadians) * currentSpeed * currentWeight + if (status === 'ERROR') { + return res.json({ status, error: (row.err_msg as string) || '분석 중 오류가 발생했습니다.' }) + } - const mainDriftLon = - Math.cos(windRadians) * windSpeed * windWeight + - Math.cos(currentRadians) * currentSpeed * currentWeight + // PENDING/RUNNING: 경과 시간 기반 진행률 계산 + // 과거 실행의 초/예측시간 비율(avg_sec_per_hr) × 현재 fcst_hr로 추정, 이력 없으면 5초/hr 폴백 + let progress: number | undefined; + if (status === 'RUNNING' && row.bgng_dtm) { + const fcstHr = Number(row.fcst_hr) || 24; + const avgSecPerHr = row.avg_sec_per_hr ? Number(row.avg_sec_per_hr) : 5; + const estimatedSec = avgSecPerHr * fcstHr; + const elapsedSec = (Date.now() - new Date(row.bgng_dtm as string).getTime()) / 1000; + progress = Math.min(95, Math.floor((elapsedSec / estimatedSec) * 100)); + } - const dispersal = waveHeight * 0.001 + res.json({ status, ...(progress !== undefined && { progress }) }) + } catch { + res.status(500).json({ error: '상태 조회 실패' }) + } +}) - for (let p = 0; p < particleCount; p++) { - const initialSpread = 0.001 - const randomAngle = Math.random() * Math.PI * 2 - let particleLat = startLat + Math.sin(randomAngle) * initialSpread * Math.random() - let particleLon = startLon + Math.cos(randomAngle) * initialSpread * Math.random() +// ============================================================ +// 백그라운드 폴링 +// ============================================================ +async function pollAndSave(jobId: string, execSn: number): Promise { + const deadline = Date.now() + POLL_TIMEOUT_MS - for (let h = 0; h <= hours; h++) { - const mainMovementLat = mainDriftLat * h * 0.01 - const mainMovementLon = mainDriftLon * h * 0.01 + while (Date.now() < deadline) { + await new Promise(resolve => setTimeout(resolve, POLL_INTERVAL_MS)) - const turbulence = Math.sin(h * 0.3 + p * 0.5) * dispersal * h - const turbulenceAngle = (h * 0.2 + p * 0.7) * Math.PI - - trajectory.push({ - lat: particleLat + mainMovementLat + Math.sin(turbulenceAngle) * turbulence, - lon: particleLon + mainMovementLon + Math.cos(turbulenceAngle) * turbulence, - time: h, - particle: p + try { + const pollRes = await fetch(`${PYTHON_API_URL}/status/${jobId}`, { + signal: AbortSignal.timeout(5000), }) + if (!pollRes.ok) continue + + const data = await pollRes.json() as PythonStatusResponse + + if (data.status === 'DONE' && data.result) { + await wingPool.query( + `UPDATE wing.PRED_EXEC + SET EXEC_STTS_CD='COMPLETED', + RSLT_DATA=$1, + CMPL_DTM=NOW(), + REQD_SEC=EXTRACT(EPOCH FROM (NOW() - BGNG_DTM))::INTEGER + WHERE PRED_EXEC_SN=$2`, + [JSON.stringify(data.result), execSn] + ) + return + } + + if (data.status === 'ERROR') { + await wingPool.query( + `UPDATE wing.PRED_EXEC SET EXEC_STTS_CD='FAILED', ERR_MSG=$1, CMPL_DTM=NOW() WHERE PRED_EXEC_SN=$2`, + [data.error ?? '분석 오류', execSn] + ) + return + } + } catch { + // 개별 폴링 오류는 무시하고 재시도 } } - return trajectory + // 타임아웃 처리 + await wingPool.query( + `UPDATE wing.PRED_EXEC SET EXEC_STTS_CD='FAILED', ERR_MSG='분석 시간 초과 (30분)', CMPL_DTM=NOW() WHERE PRED_EXEC_SN=$1`, + [execSn] + ) } -/** - * GET /api/simulation/status/:jobId - * 시뮬레이션 작업 상태 확인 - */ -router.get('/status/:jobId', async (req: Request, res: Response) => { - const jobId = req.params.jobId as string +// ============================================================ +// 타입 및 결과 변환 +// ============================================================ +interface PythonParticle { + lat: number + lon: number + stranded?: 0 | 1 +} - // jobId 형식 검증 (영숫자, 하이픈만 허용) - if (!jobId || !/^[a-zA-Z0-9-]+$/.test(jobId) || jobId.length > 50) { - return res.status(400).json({ error: '유효하지 않은 작업 ID' }) +interface WindPoint { + lat: number + lon: number + wind_speed: number + wind_direction: number +} + +interface HydrGrid { + lonInterval: number[] + boundLonLat: { top: number; bottom: number; left: number; right: number } + rows: number + cols: number + latInterval: number[] +} + +interface PythonTimeStep { + particles: PythonParticle[] + remaining_volume_m3: number + weathered_volume_m3: number + pollution_area_km2: number + beached_volume_m3: number + pollution_coast_length_m: number + center_lat?: number + center_lon?: number + wind_data?: WindPoint[] + hydr_data?: [number[][], number[][]] + hydr_grid?: HydrGrid +} + +interface PythonStatusResponse { + status: 'RUNNING' | 'DONE' | 'ERROR' + result?: PythonTimeStep[] + error?: string +} + +function transformResult(rawResult: PythonTimeStep[]) { + const trajectory = rawResult.flatMap((step, stepIdx) => + step.particles.map((p, i) => ({ + lat: p.lat, + lon: p.lon, + time: stepIdx, + particle: i, + stranded: p.stranded, + })) + ) + const lastStep = rawResult[rawResult.length - 1] + const summary = { + remainingVolume: lastStep.remaining_volume_m3, + weatheredVolume: lastStep.weathered_volume_m3, + pollutionArea: lastStep.pollution_area_km2, + beachedVolume: lastStep.beached_volume_m3, + pollutionCoastLength: lastStep.pollution_coast_length_m, } - - res.json({ - jobId: escapeHtml(jobId), - status: 'completed', - progress: 100, - message: 'Simulation completed' - }) -}) + const centerPoints = rawResult + .map((step, stepIdx) => + step.center_lat != null && step.center_lon != null + ? { lat: step.center_lat, lon: step.center_lon, time: stepIdx } + : null + ) + .filter((p): p is { lat: number; lon: number; time: number } => p !== null) + const windData = rawResult.map((step) => step.wind_data ?? []) + const hydrData = rawResult.map((step) => + step.hydr_data && step.hydr_grid + ? { value: step.hydr_data, grid: step.hydr_grid } + : null + ) + return { trajectory, summary, centerPoints, windData, hydrData } +} export default router diff --git a/backend/src/server.ts b/backend/src/server.ts index 8a7c819..f0357e3 100755 --- a/backend/src/server.ts +++ b/backend/src/server.ts @@ -157,7 +157,8 @@ app.use('/api/audit', auditRouter) // API 라우트 — 업무 app.use('/api/board', boardRouter) app.use('/api/layers', layersRouter) -app.use('/api/simulation', simulationLimiter, simulationRouter) +app.use('/api/simulation/run', simulationLimiter) // 시뮬레이션 실행만 엄격 제한 (status 폴링 제외) +app.use('/api/simulation', simulationRouter) app.use('/api/hns', hnsRouter) app.use('/api/reports', reportsRouter) app.use('/api/assets', assetsRouter) diff --git a/database/init.sql b/database/init.sql index 5803553..a23122d 100755 --- a/database/init.sql +++ b/database/init.sql @@ -299,6 +299,7 @@ CREATE TABLE SPIL_DATA ( SPIL_LOC_GEOM GEOMETRY(Point, 4326), -- 유출위치지오메트리 FCST_HR INTEGER, -- 예측시간 REG_DTM TIMESTAMPTZ NOT NULL DEFAULT NOW(), -- 등록일시 + IMG_RSLT_DATA JSONB, -- 이미지 분석 결과 (2024-06 추가) CONSTRAINT PK_SPIL_DATA PRIMARY KEY (SPIL_DATA_SN), CONSTRAINT FK_SPIL_ACDNT FOREIGN KEY (ACDNT_SN) REFERENCES ACDNT(ACDNT_SN) ON DELETE CASCADE ); @@ -320,7 +321,8 @@ COMMENT ON COLUMN SPIL_DATA.REG_DTM IS '등록일시'; -- ============================================================ CREATE TABLE PRED_EXEC ( PRED_EXEC_SN SERIAL NOT NULL, -- 예측실행순번 - SPIL_DATA_SN INTEGER NOT NULL, -- 유출정보순번 + SPIL_DATA_SN INTEGER, -- 유출정보순번 (NULL 허용 — 사고 미연결 단독 실행 대응) + ACDNT_SN INTEGER NOT NULL, -- 사고순번 (사고 참조, 유출정보 미연결 시에도 사고는 필수) ALGO_CD VARCHAR(20) NOT NULL, -- 알고리즘코드 EXEC_STTS_CD VARCHAR(20) NOT NULL DEFAULT 'PENDING', -- 실행상태코드 BGNG_DTM TIMESTAMPTZ, -- 시작일시 @@ -328,6 +330,7 @@ CREATE TABLE PRED_EXEC ( REQD_SEC INTEGER, -- 소요시간초 RSLT_DATA JSONB, -- 결과데이터 ERR_MSG TEXT, -- 오류메시지 + EXEC_NM VARCHAR(100), -- 실행명 CONSTRAINT PK_PRED_EXEC PRIMARY KEY (PRED_EXEC_SN), CONSTRAINT FK_PRED_SPIL FOREIGN KEY (SPIL_DATA_SN) REFERENCES SPIL_DATA(SPIL_DATA_SN) ON DELETE CASCADE, CONSTRAINT CK_PRED_STTS CHECK (EXEC_STTS_CD IN ('PENDING','RUNNING','COMPLETED','FAILED')) @@ -335,14 +338,16 @@ CREATE TABLE PRED_EXEC ( COMMENT ON TABLE PRED_EXEC IS '예측실행'; COMMENT ON COLUMN PRED_EXEC.PRED_EXEC_SN IS '예측실행순번'; -COMMENT ON COLUMN PRED_EXEC.SPIL_DATA_SN IS '유출정보순번 (유출정보 참조)'; -COMMENT ON COLUMN PRED_EXEC.ALGO_CD IS '알고리즘코드 (ALGO: GNOME, OSCAR 등)'; +COMMENT ON COLUMN PRED_EXEC.SPIL_DATA_SN IS '유출정보순번 (FK → SPIL_DATA, NULL 허용)'; +COMMENT ON COLUMN PRED_EXEC.ACDNT_SN IS '사고순번 (사고 참조)'; +COMMENT ON COLUMN PRED_EXEC.ALGO_CD IS '알고리즘코드 (ALGO: GNOME, OSCAR, OPENDRIFT 등)'; COMMENT ON COLUMN PRED_EXEC.EXEC_STTS_CD IS '실행상태코드 (PENDING:대기, RUNNING:실행중, COMPLETED:완료, FAILED:실패)'; COMMENT ON COLUMN PRED_EXEC.BGNG_DTM IS '시작일시'; COMMENT ON COLUMN PRED_EXEC.CMPL_DTM IS '완료일시'; COMMENT ON COLUMN PRED_EXEC.REQD_SEC IS '소요시간초 (실행 소요 시간, 초 단위)'; COMMENT ON COLUMN PRED_EXEC.RSLT_DATA IS '결과데이터 (JSON 형식 예측 결과)'; COMMENT ON COLUMN PRED_EXEC.ERR_MSG IS '오류메시지'; +COMMENT ON COLUMN PRED_EXEC.EXEC_NM IS '실행명 (EXPC_{timestamp} 형식, OpenDrift 연동용)'; -- ============================================================ diff --git a/database/migration/009_incidents.sql b/database/migration/009_incidents.sql index 85a36de..166c595 100644 --- a/database/migration/009_incidents.sql +++ b/database/migration/009_incidents.sql @@ -45,6 +45,7 @@ CREATE TABLE IF NOT EXISTS SPIL_DATA ( SPIL_TP_CD VARCHAR(20), FCST_HR INTEGER, REG_DTM TIMESTAMPTZ NOT NULL DEFAULT NOW(), + IMG_RSLT_DATA JSONB, CONSTRAINT PK_SPIL_DATA PRIMARY KEY (SPIL_DATA_SN), CONSTRAINT FK_SPIL_ACDNT FOREIGN KEY (ACDNT_SN) REFERENCES ACDNT(ACDNT_SN) ON DELETE CASCADE ); @@ -54,20 +55,23 @@ CREATE INDEX IF NOT EXISTS IDX_SPIL_ACDNT ON SPIL_DATA(ACDNT_SN); -- 3. 예측실행 (PRED_EXEC) CREATE TABLE IF NOT EXISTS PRED_EXEC ( PRED_EXEC_SN SERIAL NOT NULL, - ACDNT_SN INTEGER NOT NULL, + SPIL_DATA_SN INTEGER, + ACDNT_SN INTEGER NOT NULL, ALGO_CD VARCHAR(20) NOT NULL, EXEC_STTS_CD VARCHAR(20) NOT NULL DEFAULT 'PENDING', BGNG_DTM TIMESTAMPTZ, CMPL_DTM TIMESTAMPTZ, REQD_SEC INTEGER, RSLT_DATA JSONB, - ERR_MSG TEXT, + ERR_MSG TEXT, + EXEC_NM VARCHAR(100), CONSTRAINT PK_PRED_EXEC PRIMARY KEY (PRED_EXEC_SN), CONSTRAINT FK_PRED_ACDNT FOREIGN KEY (ACDNT_SN) REFERENCES ACDNT(ACDNT_SN) ON DELETE CASCADE, CONSTRAINT CK_PRED_STTS CHECK (EXEC_STTS_CD IN ('PENDING','RUNNING','COMPLETED','FAILED')) ); CREATE INDEX IF NOT EXISTS IDX_PRED_ACDNT ON PRED_EXEC(ACDNT_SN); +CREATE UNIQUE INDEX IF NOT EXISTS uix_pred_exec_nm ON PRED_EXEC (EXEC_NM) WHERE EXEC_NM IS NOT NULL; -- 4. 사고별 기상정보 스냅샷 (ACDNT_WEATHER) CREATE TABLE IF NOT EXISTS ACDNT_WEATHER ( diff --git a/docs/PREDICTION-GUIDE.md b/docs/PREDICTION-GUIDE.md new file mode 100644 index 0000000..9b90b98 --- /dev/null +++ b/docs/PREDICTION-GUIDE.md @@ -0,0 +1,191 @@ +# 확산 예측 기능 가이드 + +> 대상: 확산 예측(OpenDrift) 기능 개발 및 유지보수 담당자 + +--- + +## 1. 아키텍처 개요 + +**폴링 방식** — HTTP 연결 불안정 문제 해결을 위해 비동기 폴링 구조를 채택했다. + +``` +[프론트] 실행 버튼 + → POST /api/simulation/run 즉시 { execSn, status:'RUNNING' } 반환 + → "분석 중..." UI 표시 + → 3초마다 GET /api/simulation/status/:execSn 폴링 + +[Express 백엔드] + → PRED_EXEC INSERT (PENDING) + → POST Python /run-model 즉시 { job_id } 수신 + → 응답 즉시 반환 (프론트 블록 없음) + → 백그라운드: 3초마다 Python GET /status/:job_id 폴링 + → DONE 시 PRED_EXEC UPDATE (결과 JSONB 저장) + +[Python FastAPI :5003] + → 동시 처리 초과 시 503 즉시 반환 + → 여유 시 job_id 반환 + 백그라운드 OpenDrift 시뮬레이션 실행 + → NC 결과 → JSON 변환 → 상태 DONE +``` + +--- + +## 2. DB 스키마 (PRED_EXEC) + +```sql +PRED_EXEC_SN SERIAL PRIMARY KEY +ACDNT_SN INTEGER NOT NULL -- 사고 FK +SPIL_DATA_SN INTEGER -- 유출정보 FK (NULL 허용) +EXEC_NM VARCHAR(100) UNIQUE -- EXPC_{timestamp} 형식 +ALGO_CD VARCHAR(20) NOT NULL -- 'OPENDRIFT' +EXEC_STTS_CD VARCHAR(20) DEFAULT 'PENDING' + -- PENDING | RUNNING | COMPLETED | FAILED +BGNG_DTM TIMESTAMPTZ +CMPL_DTM TIMESTAMPTZ +REQD_SEC INTEGER +RSLT_DATA JSONB -- 시뮬레이션 결과 전체 +ERR_MSG TEXT +``` + +인덱스: `IDX_PRED_STTS` (EXEC_STTS_CD), `uix_pred_exec_nm` (EXEC_NM, partial) + +--- + +## 3. Python FastAPI 엔드포인트 (포트 5003) + +| 메서드 | 경로 | 설명 | +|--------|------|------| +| GET | `/get-received-date` | 최신 예보 수신 가능 날짜 | +| GET | `/get-uv/{datetime}/{category}` | 바람/해류 U/V 벡터 (`wind`\|`hydr`) | +| POST | `/check-nc` | NetCDF 파일 존재 여부 확인 | +| POST | `/run-model` | 시뮬레이션 제출 → 즉시 `job_id` 반환 | +| GET | `/status/{job_id}` | 시뮬레이션 진행 상태 조회 | + +### POST /run-model 입력 파라미터 + +```json +{ + "startTime": "2025-01-15 12:00:00", // KST (내부 UTC 변환) + "runTime": 72, // 예측 시간 (시간) + "matTy": "CRUDE OIL", // OpenDrift 유류명 + "matVol": 100.0, // 시간당 유출량 (m³/hr) + "lon": 126.1, + "lat": 36.6, + "spillTime": 12, // 유출 지속 시간 (0=순간) + "name": "EXPC_1710000000000" +} +``` + +### 유류 코드 매핑 (DB → OpenDrift) + +| DB SPIL_MAT_CD | OpenDrift 이름 | +|---------------|---------------| +| CRUD | CRUDE OIL | +| DSEL | DIESEL | +| BNKR | BUNKER | +| HEFO | IFO 180 | + +--- + +## 4. Express 백엔드 주요 엔드포인트 + +파일: [backend/src/routes/simulation.ts](../backend/src/routes/simulation.ts) + +| 메서드 | 경로 | 설명 | +|--------|------|------| +| POST | `/api/simulation/run` | 시뮬레이션 제출 → `execSn` 즉시 반환 | +| GET | `/api/simulation/status/:execSn` | 프론트 폴링용 상태 조회 | + +파일: [backend/src/prediction/predictionService.ts](../backend/src/prediction/predictionService.ts) + +- `fetchPredictionList()` — PRED_EXEC 목록 조회 +- `fetchTrajectoryResult()` — 저장된 결과 조회 (`RSLT_DATA` JSONB 파싱) + +--- + +## 5. 프론트엔드 주요 파일 + +| 파일 | 역할 | +|------|------| +| [frontend/src/tabs/prediction/components/OilSpillView.tsx](../frontend/src/tabs/prediction/components/OilSpillView.tsx) | 예측 탭 메인 뷰, 시뮬레이션 실행·폴링 상태 관리 | +| [frontend/src/tabs/prediction/hooks/](../frontend/src/tabs/prediction/hooks/) | `useSimulationStatus` 폴링 훅 | +| [frontend/src/tabs/prediction/services/predictionApi.ts](../frontend/src/tabs/prediction/services/predictionApi.ts) | API 요청 함수 + 타입 정의 | +| [frontend/src/tabs/prediction/components/RightPanel.tsx](../frontend/src/tabs/prediction/components/RightPanel.tsx) | 풍화량·잔류량·오염면적 표시 (마지막 스텝 실제 값) | +| [frontend/src/common/components/map/HydrParticleOverlay.tsx](../frontend/src/common/components/map/HydrParticleOverlay.tsx) | 해류 파티클 Canvas 오버레이 | + +### 핵심 타입 (predictionApi.ts) + +```typescript +interface HydrGrid { + lonInterval: number[]; + latInterval: number[]; + boundLonLat: { top: number; bottom: number; left: number; right: number }; + rows: number; cols: number; +} +interface HydrDataStep { + value: [number[][], number[][]]; // [u_2d, v_2d] + grid: HydrGrid; +} +``` + +### 폴링 훅 패턴 + +```typescript +useQuery({ + queryKey: ['simulationStatus', execSn], + queryFn: () => api.get(`/api/simulation/status/${execSn}`), + enabled: execSn !== null, + refetchInterval: (data) => + data?.status === 'DONE' || data?.status === 'ERROR' ? false : 3000, +}); +``` + +--- + +## 6. Python 코드 위치 (prediction/) + +``` +prediction/opendrift/ +├── api.py FastAPI 진입점 (수정 필요: 폴링 지원 + CORS) +├── config.py 경로 설정 (수정 필요: 환경변수화) +├── createJsonResult.py NC → JSON 변환 (핵심 후처리) +├── coastline/ TN_SHORLINE.shp (한국 해안선) +├── startup.sh / shutdown.sh +├── .env.example 환경변수 샘플 +└── environment-opendrift.yml conda 환경 재현용 +``` + +--- + +## 7. 환경변수 + +### backend/.env + +```bash +PYTHON_API_URL=http://localhost:5003 +``` + +### prediction/opendrift/.env + +```bash +MPR_STORAGE_ROOT=/data/storage # NetCDF 기상·해양 데이터 루트 +MPR_RESULT_ROOT=./result # 시뮬레이션 결과 저장 경로 +MAX_CONCURRENT_JOBS=4 # 동시 처리 최대 수 +``` + +--- + +## 8. 위험 요소 + +| 위험 | 내용 | +|------|------| +| NetCDF 파일 부재 | `MPR_STORAGE_ROOT` 경로에 KMA GDAPS·MOHID NC 파일 필요. 없으면 시뮬레이션 불가 | +| conda 환경 | `opendrift` conda 환경 설치 필요 (`environment-opendrift.yml`) | +| Workers 포화 | 동시 4개 초과 시 503 반환 → `MAX_CONCURRENT_JOBS` 조정 | +| 결과 용량 | 12시간 결과 ≈ 1500KB/건. 90일 주기 `RSLT_DATA = NULL` 정리 권장 | + +--- + +## 9. 관련 문서 + +- [CRUD-API-GUIDE.md](./CRUD-API-GUIDE.md) — Express API 개발 패턴 +- [COMMON-GUIDE.md](./COMMON-GUIDE.md) — 인증·상태관리 공통 로직 diff --git a/docs/RELEASE-NOTES.md b/docs/RELEASE-NOTES.md index 5437f65..c032f90 100644 --- a/docs/RELEASE-NOTES.md +++ b/docs/RELEASE-NOTES.md @@ -4,6 +4,23 @@ ## [Unreleased] +### 추가 +- OpenDrift 유류 확산 시뮬레이션 통합 (비동기 폴링 구조) +- flyTo 완료 후 자동 재생 기능 +- 이미지 분석 서버 Docker 패키징 (CPU 전용 환경) +- SPIL_DATA 이미지 분석 결과 컬럼 인라인 통합 +- CPU 전용 Docker 환경 구축 (Dockerfile.cpu, docker-compose.cpu.yml) + +### 변경 +- 이미지 분석/보고서/항공 UI 개선 +- CCTV/관리자 고도화 + +### 기타 +- 팀 워크플로우 v1.6.1 적용일 갱신 +- 팀 워크플로우 v1.6.1 동기화 (custom_pre_commit 프로젝트 해시 불일치 해결) +- 팀 워크플로우 v1.6.0 동기화 (해시 기반 자동 최신화, push/mr/release 워크플로우 체크, 팀 관리 파일 gitignore 처리) +- 팀 워크플로우 v1.5.0 동기화 (스킬 7종 업데이트, version 스킬 신규, release-notes-guide 추가) + ## [2026-03-11] ### 추가 diff --git a/frontend/src/common/components/map/HydrParticleOverlay.tsx b/frontend/src/common/components/map/HydrParticleOverlay.tsx new file mode 100644 index 0000000..2ff9154 --- /dev/null +++ b/frontend/src/common/components/map/HydrParticleOverlay.tsx @@ -0,0 +1,157 @@ +import { useEffect, useRef } from 'react'; +import { useMap } from '@vis.gl/react-maplibre'; +import type { HydrDataStep } from '@tabs/prediction/services/predictionApi'; + +interface HydrParticleOverlayProps { + hydrStep: HydrDataStep | null; +} + +const PARTICLE_COUNT = 3000; +const MAX_AGE = 300; +const SPEED_SCALE = 0.1; +const DT = 600; +const TRAIL_LENGTH = 30; // 파티클당 저장할 화면 좌표 수 +const NUM_ALPHA_BANDS = 4; // stroke 배치 단위 + +interface TrailPoint { x: number; y: number; } +interface Particle { + lon: number; + lat: number; + trail: TrailPoint[]; + age: number; +} + +export default function HydrParticleOverlay({ hydrStep }: HydrParticleOverlayProps) { + const { current: map } = useMap(); + const animRef = useRef(); + + useEffect(() => { + if (!map || !hydrStep) return; + + const container = map.getContainer(); + const canvas = document.createElement('canvas'); + canvas.style.cssText = 'position:absolute;top:0;left:0;pointer-events:none;z-index:5;'; + canvas.width = container.clientWidth; + canvas.height = container.clientHeight; + container.appendChild(canvas); + const ctx = canvas.getContext('2d')!; + + const { value: [u2d, v2d], grid } = hydrStep; + const { boundLonLat, lonInterval, latInterval } = grid; + + const lons: number[] = [boundLonLat.left]; + for (const d of lonInterval) lons.push(lons[lons.length - 1] + d); + const lats: number[] = [boundLonLat.bottom]; + for (const d of latInterval) lats.push(lats[lats.length - 1] + d); + + function getUV(lon: number, lat: number): [number, number] { + let col = -1, row = -1; + for (let i = 0; i < lons.length - 1; i++) { + if (lon >= lons[i] && lon < lons[i + 1]) { col = i; break; } + } + for (let i = 0; i < lats.length - 1; i++) { + if (lat >= lats[i] && lat < lats[i + 1]) { row = i; break; } + } + if (col < 0 || row < 0) return [0, 0]; + const fx = (lon - lons[col]) / (lons[col + 1] - lons[col]); + const fy = (lat - lats[row]) / (lats[row + 1] - lats[row]); + const u00 = u2d[row]?.[col] ?? 0, u01 = u2d[row]?.[col + 1] ?? u00; + const u10 = u2d[row + 1]?.[col] ?? u00, u11 = u2d[row + 1]?.[col + 1] ?? u00; + const v00 = v2d[row]?.[col] ?? 0, v01 = v2d[row]?.[col + 1] ?? v00; + const v10 = v2d[row + 1]?.[col] ?? v00, v11 = v2d[row + 1]?.[col + 1] ?? v00; + const u = u00 * (1 - fx) * (1 - fy) + u01 * fx * (1 - fy) + u10 * (1 - fx) * fy + u11 * fx * fy; + const v = v00 * (1 - fx) * (1 - fy) + v01 * fx * (1 - fy) + v10 * (1 - fx) * fy + v11 * fx * fy; + return [u, v]; + } + + const bbox = boundLonLat; + const particles: Particle[] = Array.from({ length: PARTICLE_COUNT }, () => ({ + lon: bbox.left + Math.random() * (bbox.right - bbox.left), + lat: bbox.bottom + Math.random() * (bbox.top - bbox.bottom), + trail: [], + age: Math.floor(Math.random() * MAX_AGE), + })); + + function resetParticle(p: Particle) { + p.lon = bbox.left + Math.random() * (bbox.right - bbox.left); + p.lat = bbox.bottom + Math.random() * (bbox.top - bbox.bottom); + p.trail = []; + p.age = 0; + } + + // 지도 이동/줌 시 화면 좌표가 틀어지므로 trail 초기화 + const onMove = () => { for (const p of particles) p.trail = []; }; + map.on('move', onMove); + + function animate() { + // 매 프레임 완전 초기화 → 잔상 없음 + ctx.clearRect(0, 0, canvas.width, canvas.height); + + // alpha band별 세그먼트 버퍼 (드로우 콜 최소화) + const bands: [number, number, number, number][][] = + Array.from({ length: NUM_ALPHA_BANDS }, () => []); + + for (const p of particles) { + const [u, v] = getUV(p.lon, p.lat); + const speed = Math.sqrt(u * u + v * v); + if (speed < 0.001) { resetParticle(p); continue; } + + const cosLat = Math.cos(p.lat * Math.PI / 180); + p.lon += u * SPEED_SCALE * DT / (cosLat * 111320); + p.lat += v * SPEED_SCALE * DT / 111320; + p.age++; + + if ( + p.lon < bbox.left || p.lon > bbox.right || + p.lat < bbox.bottom || p.lat > bbox.top || + p.age > MAX_AGE + ) { resetParticle(p); continue; } + + const curr = map.project([p.lon, p.lat]); + if (!curr) continue; + + p.trail.push({ x: curr.x, y: curr.y }); + if (p.trail.length > TRAIL_LENGTH) p.trail.shift(); + if (p.trail.length < 2) continue; + + for (let i = 1; i < p.trail.length; i++) { + const t = i / p.trail.length; // 0=oldest, 1=newest + const band = Math.min(NUM_ALPHA_BANDS - 1, Math.floor(t * NUM_ALPHA_BANDS)); + const a = p.trail[i - 1], b = p.trail[i]; + bands[band].push([a.x, a.y, b.x, b.y]); + } + } + + // alpha band별 일괄 렌더링 + ctx.lineWidth = 0.8; + for (let b = 0; b < NUM_ALPHA_BANDS; b++) { + ctx.strokeStyle = `rgba(180, 210, 255, ${((b + 1) / NUM_ALPHA_BANDS) * 0.75})`; + ctx.beginPath(); + for (const [x1, y1, x2, y2] of bands[b]) { + ctx.moveTo(x1, y1); + ctx.lineTo(x2, y2); + } + ctx.stroke(); + } + + animRef.current = requestAnimationFrame(animate); + } + + animRef.current = requestAnimationFrame(animate); + + const onResize = () => { + canvas.width = container.clientWidth; + canvas.height = container.clientHeight; + }; + map.on('resize', onResize); + + return () => { + cancelAnimationFrame(animRef.current!); + map.off('resize', onResize); + map.off('move', onMove); + canvas.remove(); + }; + }, [map, hydrStep]); + + return null; +} diff --git a/frontend/src/common/components/map/MapView.tsx b/frontend/src/common/components/map/MapView.tsx index 6e6314d..1e7b7e2 100755 --- a/frontend/src/common/components/map/MapView.tsx +++ b/frontend/src/common/components/map/MapView.tsx @@ -1,4 +1,4 @@ -import { useState, useMemo, useEffect, useCallback } from 'react' +import { useState, useMemo, useEffect, useCallback, useRef } from 'react' import { Map, Marker, Popup, Source, Layer, useControl, useMap } from '@vis.gl/react-maplibre' import { MapboxOverlay } from '@deck.gl/mapbox' import { ScatterplotLayer, PathLayer, TextLayer, BitmapLayer } from '@deck.gl/layers' @@ -8,6 +8,8 @@ import type { MapLayerMouseEvent } from 'maplibre-gl' import 'maplibre-gl/dist/maplibre-gl.css' import { layerDatabase } from '@common/services/layerService' import type { PredictionModel, SensitiveResource } from '@tabs/prediction/components/OilSpillView' +import type { HydrDataStep } from '@tabs/prediction/services/predictionApi' +import HydrParticleOverlay from './HydrParticleOverlay' import type { BoomLine, BoomLineCoord } from '@common/types/boomLine' import type { ReplayShip, CollisionEvent } from '@common/types/backtrack' import { createBacktrackLayers } from './BacktrackReplayOverlay' @@ -17,8 +19,8 @@ import { useMapStore } from '@common/store/mapStore' const GEOSERVER_URL = import.meta.env.VITE_GEOSERVER_URL || 'http://localhost:8080' const VWORLD_API_KEY = import.meta.env.VITE_VWORLD_API_KEY || '' -// 남해안 중심 좌표 (여수 앞바다) -const DEFAULT_CENTER: [number, number] = [34.5, 127.8] +// 인천 송도 국제도시 +const DEFAULT_CENTER: [number, number] = [37.39, 126.64] const DEFAULT_ZOOM = 10 // CartoDB Dark Matter 스타일 @@ -159,7 +161,7 @@ interface MapViewProps { incidentCoord?: { lon: number; lat: number } isSelectingLocation?: boolean onMapClick?: (lon: number, lat: number) => void - oilTrajectory?: Array<{ lat: number; lon: number; time: number; particle?: number; model?: PredictionModel }> + oilTrajectory?: Array<{ lat: number; lon: number; time: number; particle?: number; model?: PredictionModel; stranded?: 0 | 1 }> selectedModels?: Set dispersionResult?: DispersionResult | null dispersionHeatmap?: Array<{ lon: number; lat: number; concentration: number }> @@ -177,7 +179,16 @@ interface MapViewProps { incidentCoord: { lat: number; lon: number } } sensitiveResources?: SensitiveResource[] + flyToTarget?: { lng: number; lat: number; zoom?: number } | null + fitBoundsTarget?: { north: number; south: number; east: number; west: number } | null + centerPoints?: Array<{ lat: number; lon: number; time: number }> + windData?: Array> + hydrData?: (HydrDataStep | null)[] + // 외부 플레이어 제어 (prediction 하단 바에서 제어할 때 사용) + externalCurrentTime?: number mapCaptureRef?: React.MutableRefObject<(() => string | null) | null> + onIncidentFlyEnd?: () => void + flyToIncident?: { lon: number; lat: number } } // deck.gl 오버레이 컴포넌트 (MapLibre 컨트롤로 등록, interleaved) @@ -188,6 +199,33 @@ function DeckGLOverlay({ layers }: { layers: any[] }) { return null } +// flyTo 트리거 컴포넌트 (Map 내부에서 useMap() 사용) +function FlyToController({ flyToTarget }: { flyToTarget?: { lng: number; lat: number; zoom?: number } | null }) { + const { current: map } = useMap() + useEffect(() => { + if (!map || !flyToTarget) return + map.flyTo({ + center: [flyToTarget.lng, flyToTarget.lat], + zoom: flyToTarget.zoom ?? 10, + duration: 1200, + }) + }, [flyToTarget, map]) + return null +} + +// fitBounds 트리거 컴포넌트 (Map 내부에서 useMap() 사용) +function FitBoundsController({ fitBoundsTarget }: { fitBoundsTarget?: { north: number; south: number; east: number; west: number } | null }) { + const { current: map } = useMap() + useEffect(() => { + if (!map || !fitBoundsTarget) return + map.fitBounds( + [[fitBoundsTarget.west, fitBoundsTarget.south], [fitBoundsTarget.east, fitBoundsTarget.north]], + { padding: 80, duration: 1200, maxZoom: 12 } + ) + }, [fitBoundsTarget, map]) + return null +} + // 3D 모드 pitch/bearing 제어 컴포넌트 (Map 내부에서 useMap() 사용) function MapPitchController({ threeD }: { threeD: boolean }) { const { current: map } = useMap() @@ -203,14 +241,17 @@ function MapPitchController({ threeD }: { threeD: boolean }) { } // 사고 지점 변경 시 지도 이동 (Map 내부 컴포넌트) -function MapFlyToIncident({ lon, lat }: { lon?: number; lat?: number }) { +function MapFlyToIncident({ lon, lat, onFlyEnd }: { lon?: number; lat?: number; onFlyEnd?: () => void }) { const { current: map } = useMap() + const onFlyEndRef = useRef(onFlyEnd) + useEffect(() => { onFlyEndRef.current = onFlyEnd }, [onFlyEnd]) useEffect(() => { if (!map || lon == null || lat == null) return const doFly = () => { - map.flyTo({ center: [lon, lat], zoom: 12, duration: 1200 }) + map.flyTo({ center: [lon, lat], zoom: 11, duration: 1200 }) + map.once('moveend', () => onFlyEndRef.current?.()) } if (map.loaded()) { @@ -261,14 +302,24 @@ export function MapView({ layerBrightness = 50, backtrackReplay, sensitiveResources = [], + flyToTarget, + fitBoundsTarget, + centerPoints = [], + windData = [], + hydrData = [], + externalCurrentTime, mapCaptureRef, + onIncidentFlyEnd, + flyToIncident, }: MapViewProps) { const { mapToggles } = useMapStore() + const isControlled = externalCurrentTime !== undefined const [currentPosition, setCurrentPosition] = useState<[number, number]>(DEFAULT_CENTER) - const [currentTime, setCurrentTime] = useState(0) + const [internalCurrentTime, setInternalCurrentTime] = useState(0) const [isPlaying, setIsPlaying] = useState(false) const [playbackSpeed, setPlaybackSpeed] = useState(1) const [popupInfo, setPopupInfo] = useState(null) + const currentTime = isControlled ? externalCurrentTime : internalCurrentTime const handleMapClick = useCallback((e: MapLayerMouseEvent) => { const { lng, lat } = e.lngLat @@ -279,33 +330,34 @@ export function MapView({ setPopupInfo(null) }, [onMapClick]) - // 애니메이션 재생 로직 + // 애니메이션 재생 로직 (외부 제어 모드에서는 비활성) useEffect(() => { - if (!isPlaying || oilTrajectory.length === 0) return + if (isControlled || !isPlaying || oilTrajectory.length === 0) return const maxTime = Math.max(...oilTrajectory.map(p => p.time)) - if (currentTime >= maxTime) { + if (internalCurrentTime >= maxTime) { setIsPlaying(false) return } const interval = setInterval(() => { - setCurrentTime(prev => { + setInternalCurrentTime(prev => { const next = prev + (1 * playbackSpeed) return next > maxTime ? maxTime : next }) }, 200) return () => clearInterval(interval) - }, [isPlaying, currentTime, playbackSpeed, oilTrajectory]) + }, [isControlled, isPlaying, internalCurrentTime, playbackSpeed, oilTrajectory]) - // 시뮬레이션 시작 시 자동으로 애니메이션 재생 + // 시뮬레이션 시작 시 자동으로 애니메이션 재생 (외부 제어 모드에서는 비활성) useEffect(() => { + if (isControlled) return if (oilTrajectory.length > 0) { - setCurrentTime(0) + setInternalCurrentTime(0) setIsPlaying(true) } - }, [oilTrajectory.length]) + }, [isControlled, oilTrajectory.length]) // WMS 레이어 목록 const wmsLayers = useMemo(() => { @@ -330,6 +382,9 @@ export function MapView({ // --- 유류 확산 입자 (ScatterplotLayer) --- const visibleParticles = oilTrajectory.filter(p => p.time <= currentTime) + const activeStep = visibleParticles.length > 0 + ? Math.max(...visibleParticles.map(p => p.time)) + : -1 if (visibleParticles.length > 0) { result.push( new ScatterplotLayer({ @@ -338,8 +393,15 @@ export function MapView({ getPosition: (d: (typeof visibleParticles)[0]) => [d.lon, d.lat], getRadius: 3, getFillColor: (d: (typeof visibleParticles)[0]) => { - const modelKey = d.model || Array.from(selectedModels)[0] || 'OpenDrift' - return hexToRgba(MODEL_COLORS[modelKey] || '#3b82f6', 180) + // 1순위: stranded 입자 → 빨간색 + if (d.stranded === 1) return [239, 68, 68, 220] as [number, number, number, number] + // 2순위: 현재 활성 스텝 → 모델 기본 색상 + if (d.time === activeStep) { + const modelKey = d.model || Array.from(selectedModels)[0] || 'OpenDrift' + return hexToRgba(MODEL_COLORS[modelKey] || '#3b82f6', 180) + } + // 3순위: 과거 스텝 → 회색 + 투명 + return [130, 130, 130, 70] as [number, number, number, number] }, radiusMinPixels: 2.5, radiusMaxPixels: 5, @@ -354,6 +416,7 @@ export function MapView({ content: (
{modelKey} 입자 #{(d.particle ?? 0) + 1} + {d.stranded === 1 && (육지 부착)}
시간: +{d.time}h
@@ -364,7 +427,7 @@ export function MapView({ } }, updateTriggers: { - getFillColor: [selectedModels], + getFillColor: [selectedModels, currentTime], }, }) ) @@ -689,37 +752,73 @@ export function MapView({ ) } - // --- 해류 화살표 (TextLayer) --- - if (incidentCoord) { - const currentArrows: Array<{ lon: number; lat: number; bearing: number; speed: number }> = [] - const gridSize = 5 - const spacing = 0.04 // 약 4km 간격 - const mainBearing = 200 // SSW 방향 (도) + // --- 입자 중심점 이동 경로 (PathLayer + ScatterplotLayer) --- + const visibleCenters = centerPoints.filter(p => p.time <= currentTime) + if (visibleCenters.length >= 2) { + result.push( + new PathLayer({ + id: 'center-path', + data: [{ path: visibleCenters.map(p => [p.lon, p.lat] as [number, number]) }], + getPath: (d: { path: [number, number][] }) => d.path, + getColor: [255, 220, 50, 200], + getWidth: 2, + widthMinPixels: 2, + widthMaxPixels: 4, + }) + ) + } + if (visibleCenters.length > 0) { + result.push( + new ScatterplotLayer({ + id: 'center-points', + data: visibleCenters, + getPosition: (d: (typeof visibleCenters)[0]) => [d.lon, d.lat], + getRadius: 5, + getFillColor: [255, 220, 50, 230], + radiusMinPixels: 4, + radiusMaxPixels: 8, + pickable: false, + }) + ) + } - for (let row = -gridSize; row <= gridSize; row++) { - for (let col = -gridSize; col <= gridSize; col++) { - const lat = incidentCoord.lat + row * spacing - const lon = incidentCoord.lon + col * spacing / Math.cos(incidentCoord.lat * Math.PI / 180) - // 사고 지점에서 멀어질수록 해류 방향 약간 변화 - const distFactor = Math.sqrt(row * row + col * col) / gridSize - const localBearing = mainBearing + (col * 3) + (row * 2) - const speed = 0.3 + (1 - distFactor) * 0.2 - currentArrows.push({ lon, lat, bearing: localBearing, speed }) - } - } + // --- 바람 화살표 (TextLayer) --- + if (incidentCoord && windData.length > 0) { + type ArrowPoint = { lon: number; lat: number; bearing: number; speed: number } + + const activeWindStep = windData[currentTime] ?? windData[0] ?? [] + const currentArrows: ArrowPoint[] = activeWindStep + .filter((d) => d.wind_speed != null && d.wind_direction != null) + .map((d) => ({ + lon: d.lon, + lat: d.lat, + bearing: d.wind_direction, + speed: d.wind_speed, + })) result.push( new TextLayer({ id: 'current-arrows', data: currentArrows, - getPosition: (d: (typeof currentArrows)[0]) => [d.lon, d.lat], + getPosition: (d: ArrowPoint) => [d.lon, d.lat], getText: () => '➤', - getAngle: (d: (typeof currentArrows)[0]) => -d.bearing + 90, + getAngle: (d: ArrowPoint) => -d.bearing + 90, getSize: 22, - getColor: [6, 182, 212, 100], + getColor: (d: ArrowPoint): [number, number, number, number] => { + const s = d.speed + if (s < 3) return [6, 182, 212, 130] // cyan-500: calm + if (s < 7) return [34, 197, 94, 150] // green-500: light + if (s < 12) return [234, 179, 8, 170] // yellow-500: moderate + if (s < 17) return [249, 115, 22, 190] // orange-500: fresh + return [239, 68, 68, 210] // red-500: strong + }, characterSet: 'auto', sizeUnits: 'pixels' as const, billboard: true, + updateTriggers: { + getColor: [currentTime, windData], + getAngle: [currentTime, windData], + }, }) ) } @@ -729,7 +828,7 @@ export function MapView({ oilTrajectory, currentTime, selectedModels, boomLines, isDrawingBoom, drawingPoints, dispersionResult, dispersionHeatmap, incidentCoord, backtrackReplay, - sensitiveResources, + sensitiveResources, centerPoints, windData, ]) // 3D 모드에 따른 지도 스타일 전환 @@ -755,7 +854,11 @@ export function MapView({ {/* 3D 모드 pitch 제어 */} {/* 사고 지점 변경 시 지도 이동 */} - + + {/* 외부에서 flyTo 트리거 */} + + {/* 예측 완료 시 궤적 전체 범위로 fitBounds */} + {/* WMS 레이어 */} {wmsLayers.map(layer => ( @@ -783,6 +886,11 @@ export function MapView({ {/* deck.gl 오버레이 (인터리브드: 일반 레이어) */} + {/* 해류 파티클 오버레이 */} + {hydrData.length > 0 && ( + + )} + {/* 사고 위치 마커 (MapLibre Marker) */} {incidentCoord && !isNaN(incidentCoord.lat) && !isNaN(incidentCoord.lon) && !(dispersionHeatmap && dispersionHeatmap.length > 0) && ( @@ -832,14 +940,14 @@ export function MapView({ position={incidentCoord ? [incidentCoord.lat, incidentCoord.lon] : currentPosition} /> - {/* 타임라인 컨트롤 */} - {oilTrajectory.length > 0 && ( + {/* 타임라인 컨트롤 (외부 제어 모드에서는 숨김 — 하단 플레이어가 대신 담당) */} + {!isControlled && oilTrajectory.length > 0 && ( p.time))} isPlaying={isPlaying} playbackSpeed={playbackSpeed} - onTimeChange={setCurrentTime} + onTimeChange={setInternalCurrentTime} onPlayPause={() => setIsPlaying(!isPlaying)} onSpeedChange={setPlaybackSpeed} /> diff --git a/frontend/src/common/hooks/useSubMenu.ts b/frontend/src/common/hooks/useSubMenu.ts index 6b1baae..34e8ed6 100755 --- a/frontend/src/common/hooks/useSubMenu.ts +++ b/frontend/src/common/hooks/useSubMenu.ts @@ -175,4 +175,50 @@ export function consumeHnsReportPayload(): HnsReportPayload | null { return v; } +// ─── 유출유 예측 보고서 실 데이터 전달 ────────────────────────── +export interface OilReportPayload { + incident: { + name: string; + occurTime: string; + location: string; + lat: number | null; + lon: number | null; + pollutant: string; + spillAmount: string; + shipName: string; + }; + pollution: { + spillAmount: string; + weathered: string; + seaRemain: string; + pollutionArea: string; + coastAttach: string; + coastLength: string; + oilType: string; + }; + weather: { + windDir: string; + windSpeed: string; + waveHeight: string; + temp: string; + } | null; + spread: { + kosps: string; + openDrift: string; + poseidon: string; + }; + coastal: { + firstTime: string | null; + }; + hasSimulation: boolean; +} + +let _oilReportPayload: OilReportPayload | null = null; +export function setOilReportPayload(d: OilReportPayload | null) { _oilReportPayload = d; } +export function consumeOilReportPayload(): OilReportPayload | null { + const v = _oilReportPayload; + _oilReportPayload = null; + return v; +} + export { subMenuState } diff --git a/frontend/src/common/styles/components.css b/frontend/src/common/styles/components.css index 485bd29..213da42 100644 --- a/frontend/src/common/styles/components.css +++ b/frontend/src/common/styles/components.css @@ -259,6 +259,12 @@ background: rgba(6, 182, 212, 0.15); } + .prd-map-btn.active { + background: rgba(6, 182, 212, 0.25); + border-color: rgba(6, 182, 212, 0.6); + box-shadow: 0 0 0 1px rgba(6, 182, 212, 0.3); + } + /* ═══ Coordinate Display ═══ */ .cod { position: absolute; diff --git a/frontend/src/common/utils/imageAnalysisSignal.ts b/frontend/src/common/utils/imageAnalysisSignal.ts new file mode 100644 index 0000000..84569f4 --- /dev/null +++ b/frontend/src/common/utils/imageAnalysisSignal.ts @@ -0,0 +1,27 @@ +import type { ImageAnalyzeResult } from '@tabs/prediction/services/predictionApi'; + +/** + * 항공탐색(유출유면적분석) → 유출유 확산예측 탭 간 데이터 전달용 모듈 레벨 시그널. + * registerMainTabSwitcher / navigateToTab 패턴과 동일한 방식으로 구현된다. + */ + +interface PendingImageAnalysis extends ImageAnalyzeResult { + autoRun: boolean; +} + +let _pending: PendingImageAnalysis | null = null; + +/** 분석 결과를 시그널에 저장한다. navigateToTab 호출 직전에 사용한다. */ +export function setPendingImageAnalysis(data: PendingImageAnalysis): void { + _pending = data; +} + +/** + * 시그널에서 분석 결과를 꺼내고 초기화한다. + * OilSpillView 마운트 시 1회 호출한다. + */ +export function consumePendingImageAnalysis(): PendingImageAnalysis | null { + const value = _pending; + _pending = null; + return value; +} diff --git a/frontend/src/tabs/aerial/components/MediaManagement.tsx b/frontend/src/tabs/aerial/components/MediaManagement.tsx index e5d9940..90a41f9 100644 --- a/frontend/src/tabs/aerial/components/MediaManagement.tsx +++ b/frontend/src/tabs/aerial/components/MediaManagement.tsx @@ -1,6 +1,7 @@ import { useState, useCallback, useRef, useEffect } from 'react' -import { fetchAerialMedia } from '../services/aerialApi' +import { fetchAerialMedia, downloadAerialMedia } from '../services/aerialApi' import type { AerialMediaItem } from '../services/aerialApi' +import { navigateToTab } from '@common/hooks/useSubMenu' // ── Helpers ── @@ -48,6 +49,9 @@ export function MediaManagement() { const [searchTerm, setSearchTerm] = useState('') const [sortBy, setSortBy] = useState('latest') const [showUpload, setShowUpload] = useState(false) + const [downloadingId, setDownloadingId] = useState(null) + const [bulkDownloading, setBulkDownloading] = useState(false) + const [downloadResult, setDownloadResult] = useState<{ total: number; success: number } | null>(null) const modalRef = useRef(null) const loadData = useCallback(async () => { @@ -118,6 +122,38 @@ export function MediaManagement() { }) } + const handleBulkDownload = async () => { + if (bulkDownloading || selectedIds.size === 0) return + setBulkDownloading(true) + let success = 0 + const total = selectedIds.size + for (const sn of selectedIds) { + const item = mediaItems.find(f => f.aerialMediaSn === sn) + if (!item) continue + try { + await downloadAerialMedia(sn, item.orgnlNm ?? item.fileNm) + success++ + } catch { + // 실패 건 스킵 + } + } + setBulkDownloading(false) + setDownloadResult({ total, success }) + } + + const handleDownload = async (e: React.MouseEvent, item: AerialMediaItem) => { + e.stopPropagation() + if (downloadingId !== null) return + setDownloadingId(item.aerialMediaSn) + try { + await downloadAerialMedia(item.aerialMediaSn, item.orgnlNm ?? item.fileNm) + } catch { + alert('다운로드 실패: 이미지를 찾을 수 없습니다.') + } finally { + setDownloadingId(null) + } + } + const droneCount = mediaItems.filter(f => f.equipTpCd === 'drone').length const planeCount = mediaItems.filter(f => f.equipTpCd === 'plane').length const satCount = mediaItems.filter(f => f.equipTpCd === 'satellite').length @@ -254,8 +290,12 @@ export function MediaManagement() { {f.fileSz ?? '—'} {f.resolution ?? '—'} e.stopPropagation()}> - @@ -274,15 +314,47 @@ export function MediaManagement() { - -
+ {/* 선택 다운로드 결과 팝업 */} + {downloadResult && ( +
+
+
📥
+
다운로드 완료
+
+ 총 {downloadResult.total}건 선택 +
+
+ {downloadResult.success}건 다운로드 성공 + {downloadResult.total - downloadResult.success > 0 && ( + <> / {downloadResult.total - downloadResult.success}건 실패 + )} +
+ +
+
+ )} + {/* Upload Modal */} {showUpload && (
diff --git a/frontend/src/tabs/aerial/components/OilAreaAnalysis.tsx b/frontend/src/tabs/aerial/components/OilAreaAnalysis.tsx index ea8333c..47c0bc2 100644 --- a/frontend/src/tabs/aerial/components/OilAreaAnalysis.tsx +++ b/frontend/src/tabs/aerial/components/OilAreaAnalysis.tsx @@ -1,212 +1,245 @@ -import { useState } from 'react' +import { useState, useRef, useEffect, useCallback } from 'react'; +import { stitchImages } from '../services/aerialApi'; +import { analyzeImage } from '@tabs/prediction/services/predictionApi'; +import { setPendingImageAnalysis } from '@common/utils/imageAnalysisSignal'; +import { navigateToTab } from '@common/hooks/useSubMenu'; -// ── Types & Mock Data ── - -interface MosaicImage { - id: string - filename: string - status: 'done' | 'processing' | 'waiting' - hasOil: boolean -} - -const mosaicImages: MosaicImage[] = [ - { id: 'T1', filename: '드론_001.jpg', status: 'done', hasOil: true }, - { id: 'T2', filename: '드론_002.jpg', status: 'done', hasOil: true }, - { id: 'T3', filename: '드론_003.jpg', status: 'done', hasOil: true }, - { id: 'T4', filename: '드론_004.jpg', status: 'done', hasOil: true }, - { id: 'T5', filename: '드론_005.jpg', status: 'processing', hasOil: false }, - { id: 'T6', filename: '드론_006.jpg', status: 'waiting', hasOil: false }, -] - -// ── Component ── +const MAX_IMAGES = 6; export function OilAreaAnalysis() { - const [activeStep, setActiveStep] = useState(1) - const [analyzing, setAnalyzing] = useState(false) - const [analyzed, setAnalyzed] = useState(false) + const [selectedFiles, setSelectedFiles] = useState([]); + const [previewUrls, setPreviewUrls] = useState([]); + const [stitchedBlob, setStitchedBlob] = useState(null); + const [stitchedPreviewUrl, setStitchedPreviewUrl] = useState(null); + const [isStitching, setIsStitching] = useState(false); + const [isAnalyzing, setIsAnalyzing] = useState(false); + const [error, setError] = useState(null); + const fileInputRef = useRef(null); - const handleAnalyze = () => { - setAnalyzing(true) - setTimeout(() => { - setAnalyzing(false) - setAnalyzed(true) - }, 1500) - } + // Object URL 메모리 누수 방지 — 언마운트 시 전체 revoke + useEffect(() => { + return () => { + previewUrls.forEach(url => URL.revokeObjectURL(url)); + if (stitchedPreviewUrl) URL.revokeObjectURL(stitchedPreviewUrl); + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); - const stepCls = (idx: number) => { - if (idx < activeStep) return 'border-status-green text-status-green bg-[rgba(34,197,94,0.05)]' - if (idx === activeStep) return 'border-primary-cyan text-primary-cyan bg-[rgba(6,182,212,0.05)]' - return 'border-border text-text-3 bg-bg-3' - } + const handleFileSelect = useCallback((e: React.ChangeEvent) => { + setError(null); + const incoming = Array.from(e.target.files ?? []); + if (incoming.length === 0) return; + + setSelectedFiles(prev => { + const merged = [...prev, ...incoming].slice(0, MAX_IMAGES); + if (prev.length + incoming.length > MAX_IMAGES) { + setError(`최대 ${MAX_IMAGES}장까지 선택할 수 있습니다.`); + } + return merged; + }); + + // setSelectedFiles updater 밖에서 독립 호출 — updater 내부 side effect는 + // React Strict Mode의 이중 호출로 인해 URL이 중복 생성되는 버그를 유발함 + setPreviewUrls(prev => { + const available = MAX_IMAGES - prev.length; + const toAdd = incoming.slice(0, available); + return [...prev, ...toAdd.map(f => URL.createObjectURL(f))]; + }); + + // input 초기화 (동일 파일 재선택 허용) + e.target.value = ''; + }, []); + + const handleRemoveFile = useCallback((idx: number) => { + setSelectedFiles(prev => prev.filter((_, i) => i !== idx)); + setPreviewUrls(prev => { + URL.revokeObjectURL(prev[idx]); + return prev.filter((_, i) => i !== idx); + }); + // 합성 결과 초기화 (선택 파일이 바뀌었으므로) + setStitchedBlob(null); + if (stitchedPreviewUrl) { + URL.revokeObjectURL(stitchedPreviewUrl); + setStitchedPreviewUrl(null); + } + setError(null); + }, [stitchedPreviewUrl]); + + const handleStitch = async () => { + if (selectedFiles.length < 2) { + setError('이미지를 2장 이상 선택해주세요.'); + return; + } + setError(null); + setIsStitching(true); + try { + const blob = await stitchImages(selectedFiles); + if (stitchedPreviewUrl) URL.revokeObjectURL(stitchedPreviewUrl); + setStitchedBlob(blob); + setStitchedPreviewUrl(URL.createObjectURL(blob)); + } catch (err) { + const msg = + err instanceof Error + ? err.message + : (err as { message?: string }).message ?? '이미지 합성에 실패했습니다.'; + const status = err instanceof Error ? 0 : (err as { status?: number }).status ?? 0; + setError(status === 504 ? '이미지 합성 서버 응답 시간이 초과되었습니다.' : msg); + } finally { + setIsStitching(false); + } + }; + + const handleAnalyze = async () => { + if (!stitchedBlob) return; + setError(null); + setIsAnalyzing(true); + try { + const stitchedFile = new File([stitchedBlob], `stitch_${Date.now()}.jpg`, { type: 'image/jpeg' }); + const result = await analyzeImage(stitchedFile); + setPendingImageAnalysis({ ...result, autoRun: true }); + navigateToTab('prediction', 'analysis'); + } catch (err) { + const msg = err instanceof Error ? err.message : '분석에 실패했습니다.'; + setError(msg.includes('GPS') ? '이미지에 GPS 정보가 없습니다. GPS 정보가 포함된 이미지를 사용해주세요.' : msg); + setIsAnalyzing(false); + } + }; + + const canStitch = selectedFiles.length >= 2 && !isStitching && !isAnalyzing; + const canAnalyze = stitchedBlob !== null && !isStitching && !isAnalyzing; return (
- {/* Left Panel */} -
+ {/* ── Left Panel ── */} +
🧩 유출유면적분석
-
단면 사진을 합성하여 유출유 확산 면적과 기름 양을 산정합니다.
- - {/* Step Indicator */} -
- {['① 사진 선택', '② 정합·합성', '③ 면적 산정'].map((label, i) => ( - - ))} +
+ 드론 사진을 합성하여 유출유 확산 면적과 기름 양을 산정합니다.
- {/* Selected Images */} -
선택된 사진 (6장)
-
- {['여수항_드론_001.jpg', '여수항_드론_002.jpg', '여수항_드론_003.jpg', '여수항_드론_004.jpg', '여수항_드론_005.jpg', '여수항_드론_006.jpg'].map((name, i) => ( -
- 🛸 - {name} - - {i < 4 ? '✓ 정합' : i === 4 ? '⏳ 정합중' : '대기'} - + {/* 이미지 선택 버튼 */} + + + + {/* 선택된 이미지 목록 */} + {selectedFiles.length > 0 && ( + <> +
선택된 이미지
+
+ {selectedFiles.map((file, i) => ( +
+ 📷 + {file.name} + +
+ ))}
- ))} -
+ + )} - {/* Analysis Parameters */} -
분석 파라미터
-
- {[ - ['촬영 고도', '120 m'], - ['GSD (지상해상도)', '3.2 cm/px'], - ['오버랩 비율', '80% / 70%'], - ['좌표계', 'EPSG:5186'], - ['유종 판별 기준', 'NDVI + NIR'], - ['유막 두께 추정', 'Bonn Agreement'], - ].map(([label, value], i) => ( -
- {label} - {value} -
- ))} -
+ {/* 에러 메시지 */} + {error && ( +
+ {error} +
+ )} - {/* Action Buttons */} + {/* 이미지 합성 버튼 */} + + + {/* 분석 시작 버튼 */} -
- {/* Right Panel */} + {/* ── Right Panel ── */}
- {/* Header */} -
- 🗺 합성 영상 및 유막 탐지 결과 -
- ■ 유막 탐지 - □ 원본 타일 - 정합률 96.2% -
-
- - {/* Image Grid 3×2 */} + {/* 3×2 이미지 그리드 */} +
선택된 이미지 미리보기
- {mosaicImages.map(img => ( -
-
- {img.hasOil && ( -
- )} -
{img.id}
-
- {img.status === 'done' && img.hasOil ? '유막' : img.status === 'processing' ? '정합중' : '대기'} + {Array.from({ length: MAX_IMAGES }).map((_, i) => ( +
+ {previewUrls[i] ? ( + {selectedFiles[i]?.name + ) : ( +
+ {i + 1}
-
-
- {img.filename} - - {img.status === 'done' ? '✓' : img.status === 'processing' ? '⏳' : '—'} - -
+ )}
))}
- {/* Merged Result Preview */} -
-
-
- 합성 영역 (3×2 그리드) + {/* 합성 결과 */} +
합성 결과
+
+ {stitchedPreviewUrl ? ( + 합성 결과 + ) : ( +
+ {isStitching + ? '⏳ 이미지를 합성하고 있습니다...' + : '이미지를 선택하고 합성 버튼을 클릭하면\n합성 결과가 여기에 표시됩니다.'}
-
-
-
-
34.7312°N, 127.6845°E
-
축척 ≈ 1:2,500
-
- - {/* Analysis Results */} -
-
📊 유출유 분석 결과
-
- {[ - { value: '0.42 km²', label: '유막 면적', color: 'text-status-red' }, - { value: '12.6 kL', label: '추정 유출량', color: 'text-status-orange' }, - { value: '1.84 km²', label: '합성 영역 면적', color: 'text-primary-cyan' }, - ].map((r, i) => ( -
-
{r.value}
-
{r.label}
-
- ))} -
-
- {[ - ['두꺼운 유막 (>1mm)', '0.08 km²', 'text-status-red'], - ['얇은 유막 (<1mm)', '0.34 km²', 'text-status-orange'], - ['무지개 빛깔', '0.12 km²', 'text-status-yellow'], - ['Bonn 코드', 'Code 3~4', 'text-text-1'], - ].map(([label, value, color], i) => ( -
- {label} - {value} -
- ))} -
+ )}
- ) + ); } diff --git a/frontend/src/tabs/aerial/services/aerialApi.ts b/frontend/src/tabs/aerial/services/aerialApi.ts index 90b8ed6..49af96d 100644 --- a/frontend/src/tabs/aerial/services/aerialApi.ts +++ b/frontend/src/tabs/aerial/services/aerialApi.ts @@ -103,3 +103,30 @@ export async function createSatRequest( const response = await api.post<{ satReqSn: number }>('/aerial/sat-requests', input); return response.data; } + +export async function downloadAerialMedia(sn: number, fileName: string): Promise { + const res = await api.get(`/aerial/media/${sn}/download`, { responseType: 'blob' }); + const url = URL.createObjectURL(res.data as Blob); + const a = document.createElement('a'); + a.href = url; + a.download = fileName; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); +} + +/** + * 여러 이미지 파일을 /aerial/stitch 엔드포인트로 전송해 합성 JPEG Blob을 반환한다. + * FastAPI /stitch → pic_gps.py 스티칭 파이프라인 프록시. + */ +export async function stitchImages(files: File[]): Promise { + const form = new FormData(); + files.forEach(f => form.append('files', f)); + const response = await api.post('/aerial/stitch', form, { + responseType: 'blob', + timeout: 310_000, + headers: { 'Content-Type': undefined }, // 기본 application/json 제거 → 브라우저가 multipart/form-data 자동 설정 + }); + return response.data; +} diff --git a/frontend/src/tabs/prediction/components/LeftPanel.tsx b/frontend/src/tabs/prediction/components/LeftPanel.tsx index 4811dd9..40260b7 100755 --- a/frontend/src/tabs/prediction/components/LeftPanel.tsx +++ b/frontend/src/tabs/prediction/components/LeftPanel.tsx @@ -10,8 +10,11 @@ export function LeftPanel({ selectedAnalysis, enabledLayers, onToggleLayer, + accidentTime, + onAccidentTimeChange, incidentCoord, onCoordChange, + isSelectingLocation, onMapSelectClick, onRunSimulation, isRunningSimulation, @@ -25,6 +28,10 @@ export function LeftPanel({ onOilTypeChange, spillAmount, onSpillAmountChange, + incidentName, + onIncidentNameChange, + spillUnit, + onSpillUnitChange, boomLines, onBoomLinesChange, oilTrajectory, @@ -40,6 +47,7 @@ export function LeftPanel({ onLayerOpacityChange, layerBrightness, onLayerBrightnessChange, + onImageAnalysisResult, }: LeftPanelProps) { const [expandedSections, setExpandedSections] = useState({ predictionInput: true, @@ -64,8 +72,11 @@ export function LeftPanel({ toggleSection('predictionInput')} + accidentTime={accidentTime} + onAccidentTimeChange={onAccidentTimeChange} incidentCoord={incidentCoord} onCoordChange={onCoordChange} + isSelectingLocation={isSelectingLocation} onMapSelectClick={onMapSelectClick} onRunSimulation={onRunSimulation} isRunningSimulation={isRunningSimulation} @@ -79,6 +90,11 @@ export function LeftPanel({ onOilTypeChange={onOilTypeChange} spillAmount={spillAmount} onSpillAmountChange={onSpillAmountChange} + incidentName={incidentName} + onIncidentNameChange={onIncidentNameChange} + spillUnit={spillUnit} + onSpillUnitChange={onSpillUnitChange} + onImageAnalysisResult={onImageAnalysisResult} /> {/* Incident Section */} @@ -178,7 +194,7 @@ export function LeftPanel({ boomLines={boomLines} onBoomLinesChange={onBoomLinesChange} oilTrajectory={oilTrajectory} - incidentCoord={incidentCoord} + incidentCoord={incidentCoord ?? { lat: 0, lon: 0 }} algorithmSettings={algorithmSettings} onAlgorithmSettingsChange={onAlgorithmSettingsChange} isDrawingBoom={isDrawingBoom} diff --git a/frontend/src/tabs/prediction/components/OilSpillView.tsx b/frontend/src/tabs/prediction/components/OilSpillView.tsx index 0cc91ea..10ed147 100755 --- a/frontend/src/tabs/prediction/components/OilSpillView.tsx +++ b/frontend/src/tabs/prediction/components/OilSpillView.tsx @@ -1,4 +1,4 @@ -import { useState, useEffect, useCallback } from 'react' +import { useState, useEffect, useCallback, useMemo, useRef } from 'react' import { LeftPanel } from './LeftPanel' import { RightPanel } from './RightPanel' import { MapView } from '@common/components/map/MapView' @@ -8,14 +8,17 @@ import { BoomDeploymentTheoryView } from './BoomDeploymentTheoryView' import { BacktrackModal } from './BacktrackModal' import { RecalcModal } from './RecalcModal' import { BacktrackReplayBar } from '@common/components/map/BacktrackReplayBar' -import { useSubMenu, navigateToTab, setReportGenCategory } from '@common/hooks/useSubMenu' +import { useSubMenu, navigateToTab, setReportGenCategory, setOilReportPayload, type OilReportPayload } from '@common/hooks/useSubMenu' import type { BoomLine, AlgorithmSettings, ContainmentResult, BoomLineCoord } from '@common/types/boomLine' import type { BacktrackPhase, BacktrackVessel, BacktrackConditions, ReplayShip, CollisionEvent } from '@common/types/backtrack' import { TOTAL_REPLAY_FRAMES } from '@common/types/backtrack' -import { fetchBacktrackByAcdnt, createBacktrack, fetchPredictionDetail } from '../services/predictionApi' -import type { PredictionDetail } from '../services/predictionApi' +import { fetchBacktrackByAcdnt, createBacktrack, fetchPredictionDetail, fetchAnalysisTrajectory } from '../services/predictionApi' +import type { CenterPoint, HydrDataStep, ImageAnalyzeResult, OilParticle, PredictionDetail, SimulationRunResponse, SimulationSummary, WindPoint } from '../services/predictionApi' +import { useSimulationStatus } from '../hooks/useSimulationStatus' +import SimulationLoadingOverlay from './SimulationLoadingOverlay' import { api } from '@common/services/api' import { generateAIBoomLines } from '@common/utils/geo' +import { consumePendingImageAnalysis } from '@common/utils/imageAnalysisSignal' export type PredictionModel = 'KOSPS' | 'POSEIDON' | 'OpenDrift' @@ -101,15 +104,24 @@ export const ALL_MODELS: PredictionModel[] = ['KOSPS', 'POSEIDON', 'OpenDrift'] export function OilSpillView() { const { activeSubTab, setActiveSubTab } = useSubMenu('prediction') const [enabledLayers, setEnabledLayers] = useState>(new Set()) - const [incidentCoord, setIncidentCoord] = useState({ lon: 127.6845, lat: 34.7312 }) + const [incidentCoord, setIncidentCoord] = useState<{ lon: number; lat: number } | null>(null) + const [flyToCoord, setFlyToCoord] = useState<{ lon: number; lat: number } | undefined>(undefined) + const flyToTarget = null + const fitBoundsTarget = null const [isSelectingLocation, setIsSelectingLocation] = useState(false) - const [oilTrajectory, setOilTrajectory] = useState>([]) + const [oilTrajectory, setOilTrajectory] = useState([]) + const [centerPoints, setCenterPoints] = useState([]) + const [windData, setWindData] = useState([]) + const [hydrData, setHydrData] = useState<(HydrDataStep | null)[]>([]) const [isRunningSimulation, setIsRunningSimulation] = useState(false) const [selectedModels, setSelectedModels] = useState>(new Set(['KOSPS'])) const [predictionTime, setPredictionTime] = useState(48) + const [accidentTime, setAccidentTime] = useState('') const [spillType, setSpillType] = useState('연속') const [oilType, setOilType] = useState('벙커C유') const [spillAmount, setSpillAmount] = useState(100) + const [incidentName, setIncidentName] = useState('') + const [spillUnit, setSpillUnit] = useState('kL') // 민감자원 const [sensitiveResources, setSensitiveResources] = useState([]) @@ -132,7 +144,7 @@ export function OilSpillView() { // 타임라인 플레이어 상태 const [isPlaying, setIsPlaying] = useState(false) - const [timelinePosition, setTimelinePosition] = useState(25) // 0~100% + const [currentStep, setCurrentStep] = useState(0) // 현재 시간값 (시간 단위) const [playSpeed, setPlaySpeed] = useState(1) // 역추적 상태 @@ -152,26 +164,17 @@ export function OilSpillView() { // 역추적 API 데이터 const [backtrackConditions, setBacktrackConditions] = useState({ estimatedSpillTime: '', analysisRange: '±12시간', searchRadius: '10 NM', - spillLocation: { lat: 34.7312, lon: 127.6845 }, totalVessels: 0, + spillLocation: { lat: 37.3883, lon: 126.6435 }, totalVessels: 0, }) const [replayShips, setReplayShips] = useState([]) const [collisionEvent, setCollisionEvent] = useState(null) // 재계산 상태 const [recalcModalOpen, setRecalcModalOpen] = useState(false) + const [currentExecSn, setCurrentExecSn] = useState(null) + const [simulationSummary, setSimulationSummary] = useState(null) + const { data: simStatus } = useSimulationStatus(currentExecSn) - // 분석 탭 초기 진입 시 기본 데모 자동 표시 - useEffect(() => { - if (activeSubTab === 'analysis' && oilTrajectory.length === 0 && !selectedAnalysis) { - const models = Array.from(selectedModels.size > 0 ? selectedModels : new Set(['KOSPS'])) - const demoTrajectory = generateDemoTrajectory(incidentCoord, models, predictionTime) - setOilTrajectory(demoTrajectory) - const demoBooms = generateAIBoomLines(demoTrajectory, incidentCoord, algorithmSettings) - setBoomLines(demoBooms) - setSensitiveResources(DEMO_SENSITIVE_RESOURCES) - } - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [activeSubTab]) const handleToggleLayer = (layerId: string, enabled: boolean) => { setEnabledLayers(prev => { @@ -204,7 +207,7 @@ export function OilSpillView() { estimatedSpillTime: bt.estSpilDtm ? new Date(bt.estSpilDtm).toLocaleString('ko-KR', { month: '2-digit', day: '2-digit', hour: '2-digit', minute: '2-digit' }) : '', analysisRange: bt.anlysRange || '±12시간', searchRadius: bt.srchRadiusNm ? `${bt.srchRadiusNm} NM` : '10 NM', - spillLocation: { lat: bt.lat || incidentCoord.lat, lon: bt.lon || incidentCoord.lon }, + spillLocation: { lat: bt.lat || incidentCoord?.lat || 0, lon: bt.lon || incidentCoord?.lon || 0 }, totalVessels: bt.totalVessels || 0, }) setBacktrackPhase('results') @@ -225,7 +228,7 @@ export function OilSpillView() { setBacktrackModalOpen(true) setBacktrackConditions(prev => ({ ...prev, - spillLocation: incidentCoord, + spillLocation: incidentCoord ?? prev.spillLocation, })) if (selectedAnalysis) { loadBacktrackData(selectedAnalysis.acdntSn) @@ -236,6 +239,7 @@ export function OilSpillView() { } const handleRunBacktrackAnalysis = async () => { + if (!incidentCoord) return setBacktrackPhase('analyzing') try { if (selectedAnalysis) { @@ -290,10 +294,6 @@ export function OilSpillView() { // 역추적 리플레이 애니메이션 useEffect(() => { if (!isReplayPlaying) return - if (replayFrame >= TOTAL_REPLAY_FRAMES) { - setIsReplayPlaying(false) - return - } const interval = setInterval(() => { setReplayFrame(prev => { const next = prev + 1 @@ -305,13 +305,127 @@ export function OilSpillView() { }) }, 50 / replaySpeed) return () => clearInterval(interval) - }, [isReplayPlaying, replayFrame, replaySpeed]) + }, [isReplayPlaying, replaySpeed]) + + // flyTo 완료 후 재생 대기 플래그 + const pendingPlayRef = useRef(false) + + // 항공 이미지 분석 완료 후 자동실행 플래그 + const pendingAutoRunRef = useRef(false) + + // 마운트 시 이미지 분석 시그널 확인 (유출유면적분석 탭에서 이동한 경우) + useEffect(() => { + const pending = consumePendingImageAnalysis() + if (!pending) return + handleImageAnalysisResult({ + acdntSn: pending.acdntSn, + lat: pending.lat, + lon: pending.lon, + oilType: pending.oilType, + area: pending.area, + volume: pending.volume, + fileId: pending.fileId, + occurredAt: pending.occurredAt, + }) + if (pending.autoRun) pendingAutoRunRef.current = true + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []) + + // incidentCoord 업데이트 후 시뮬레이션 자동실행 + useEffect(() => { + if (pendingAutoRunRef.current && incidentCoord) { + pendingAutoRunRef.current = false + handleRunSimulation() + } + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [incidentCoord]) + + const handleFlyEnd = useCallback(() => { + setFlyToCoord(undefined) + if (pendingPlayRef.current) { + pendingPlayRef.current = false + setIsPlaying(true) + } + }, []) + + // 시뮬레이션 폴링 결과 처리 + useEffect(() => { + if (!simStatus) return; + if (simStatus.status === 'DONE' && simStatus.trajectory) { + // eslint-disable-next-line react-hooks/set-state-in-effect + setOilTrajectory(simStatus.trajectory); + setSimulationSummary(simStatus.summary ?? null); + setCenterPoints(simStatus.centerPoints ?? []); + setWindData(simStatus.windData ?? []); + setHydrData(simStatus.hydrData ?? []); + setIsRunningSimulation(false); + setCurrentExecSn(null); + // AI 방어선 자동 생성 + if (incidentCoord) { + const booms = generateAIBoomLines(simStatus.trajectory, incidentCoord, algorithmSettings); + setBoomLines(booms); + } + setSensitiveResources(DEMO_SENSITIVE_RESOURCES); + // 새 시뮬레이션 완료 시 flyTo 없으므로 즉시 재생 + setCurrentStep(0); + setIsPlaying(true); + } + if (simStatus.status === 'ERROR') { + setIsRunningSimulation(false); + setCurrentExecSn(null); + } + }, [simStatus, incidentCoord, algorithmSettings]); + + // trajectory 변경 시 플레이어 스텝 초기화 (재생은 각 경로에서 별도 처리) + useEffect(() => { + if (oilTrajectory.length > 0) { + // eslint-disable-next-line react-hooks/set-state-in-effect + setCurrentStep(0); + } + }, [oilTrajectory.length]); + + // 플레이어 재생 애니메이션 (1x = 1초/스텝, 2x = 0.5초/스텝, 4x = 0.25초/스텝) + const timeSteps = useMemo(() => { + if (oilTrajectory.length === 0) return []; + const unique = [...new Set(oilTrajectory.map(p => p.time))].sort((a, b) => a - b); + return unique; + }, [oilTrajectory]); + + const maxTime = timeSteps[timeSteps.length - 1] ?? predictionTime; + + useEffect(() => { + if (!isPlaying || timeSteps.length === 0) return; + if (currentStep >= maxTime) { + // eslint-disable-next-line react-hooks/set-state-in-effect + setIsPlaying(false); + return; + } + const ms = 1000 / playSpeed; + const id = setInterval(() => { + setCurrentStep(prev => { + const idx = timeSteps.indexOf(prev); + if (idx < 0 || idx >= timeSteps.length - 1) { + setIsPlaying(false); + return timeSteps[timeSteps.length - 1]; + } + return timeSteps[idx + 1]; + }); + }, ms); + return () => clearInterval(id); + }, [isPlaying, currentStep, playSpeed, timeSteps, maxTime]); // 분석 목록에서 사고명 클릭 시 const handleSelectAnalysis = async (analysis: Analysis) => { + setIsPlaying(false) + setCurrentStep(0) setSelectedAnalysis(analysis) + setCenterPoints([]) + if (analysis.occurredAt) { + setAccidentTime(analysis.occurredAt.slice(0, 16)) + } if (analysis.lon != null && analysis.lat != null) { setIncidentCoord({ lon: analysis.lon, lat: analysis.lat }) + setFlyToCoord({ lon: analysis.lon, lat: analysis.lat }) } // 유종 매핑 const oilTypeMap: Record = { @@ -336,16 +450,49 @@ export function OilSpillView() { // 분석 화면으로 전환 setActiveSubTab('analysis') - // 데모 궤적 자동 생성 (화면 진입 즉시 시각화) const coord = (analysis.lon != null && analysis.lat != null) ? { lon: analysis.lon, lat: analysis.lat } : incidentCoord const demoModels = Array.from(models.size > 0 ? models : new Set(['KOSPS'])) + + // OpenDrift 완료된 경우 실제 궤적 로드, 없으면 데모로 fallback + if (analysis.opendriftStatus === 'completed') { + try { + const { trajectory, summary, centerPoints: cp, windData: wd, hydrData: hd } = await fetchAnalysisTrajectory(analysis.acdntSn) + if (trajectory && trajectory.length > 0) { + setOilTrajectory(trajectory) + if (summary) setSimulationSummary(summary) + setCenterPoints(cp ?? []) + setWindData(wd ?? []) + setHydrData(hd ?? []) + const booms = generateAIBoomLines(trajectory, coord, algorithmSettings) + setBoomLines(booms) + setSensitiveResources(DEMO_SENSITIVE_RESOURCES) + // incidentCoord가 변경된 경우 flyTo 완료 후 재생, 그렇지 않으면 즉시 재생 + if (analysis.lon !== incidentCoord?.lon || analysis.lat !== incidentCoord?.lat) { + pendingPlayRef.current = true + } else { + setIsPlaying(true) + } + return + } + } catch (err) { + console.error('[prediction] trajectory 로딩 실패, 데모로 fallback:', err) + } + } + + // 데모 궤적 생성 (fallback) const demoTrajectory = generateDemoTrajectory(coord, demoModels, parseInt(analysis.duration) || 48) setOilTrajectory(demoTrajectory) const demoBooms = generateAIBoomLines(demoTrajectory, coord, algorithmSettings) setBoomLines(demoBooms) setSensitiveResources(DEMO_SENSITIVE_RESOURCES) + // incidentCoord가 변경된 경우 flyTo 완료 후 재생, 그렇지 않으면 즉시 재생 + if (analysis.lon !== incidentCoord?.lon || analysis.lat !== incidentCoord?.lat) { + pendingPlayRef.current = true + } else { + setIsPlaying(true) + } } const handleMapClick = (lon: number, lat: number) => { @@ -357,57 +504,176 @@ export function OilSpillView() { } } + const handleImageAnalysisResult = useCallback((result: ImageAnalyzeResult) => { + setIncidentCoord({ lat: result.lat, lon: result.lon }) + setFlyToCoord({ lat: result.lat, lon: result.lon }) + setAccidentTime(result.occurredAt.slice(0, 16)) + setOilType(result.oilType) + setSpillAmount(parseFloat(result.volume.toFixed(4))) + setSpillUnit('kL') + setSelectedAnalysis({ + acdntSn: result.acdntSn, + acdntNm: '', + occurredAt: result.occurredAt, + analysisDate: '', + requestor: '', + duration: '48', + oilType: result.oilType, + volume: result.volume, + location: '', + lat: result.lat, + lon: result.lon, + kospsStatus: 'pending', + poseidonStatus: 'pending', + opendriftStatus: 'pending', + backtrackStatus: 'pending', + analyst: '', + officeName: '', + }) + }, []) + const handleRunSimulation = async () => { - if (selectedModels.size === 0) return - setIsRunningSimulation(true) + // incidentName이 있으면 직접 입력 모드 — 기존 selectedAnalysis.acdntSn 무시하고 새 사고 생성 + const isDirectInput = incidentName.trim().length > 0; + const existingAcdntSn = isDirectInput + ? undefined + : (selectedAnalysis?.acdntSn ?? analysisDetail?.acdnt?.acdntSn); + // 선택 모드인데 사고도 없으면 실행 불가, 직접 입력 모드인데 사고명 없으면 실행 불가 + if (!isDirectInput && !existingAcdntSn) { + return; + } + if (!incidentCoord) { + return; + } + + setIsRunningSimulation(true); + setSimulationSummary(null); try { - const models = Array.from(selectedModels) - const results = await Promise.all( - models.map(async (model) => { - const { data } = await api.post<{ trajectory: Array<{ lat: number; lon: number; time: number; particle?: number }> }>('/simulation/run', { - model, - lat: incidentCoord.lat, - lon: incidentCoord.lon, - duration_hours: predictionTime, - oil_type: oilType, - spill_amount: spillAmount, - spill_type: spillType, - }) - return data.trajectory.map(p => ({ ...p, model })) - }) - ) + const payload: Record = { + acdntSn: existingAcdntSn, + lat: incidentCoord.lat, + lon: incidentCoord.lon, + runTime: predictionTime, + matTy: oilType, + matVol: spillAmount, + spillTime: spillType === '연속' ? predictionTime : 0, + startTime: accidentTime + ? `${accidentTime}:00` + : analysisDetail?.acdnt?.occurredAt, + }; - setOilTrajectory(results.flat()) + // 직접 입력 모드: 백엔드에서 ACDNT + SPIL_DATA 생성에 필요한 필드 추가 + if (isDirectInput) { + payload.acdntNm = incidentName.trim(); + payload.spillUnit = spillUnit; + payload.spillTypeCd = spillType; + } + + const { data } = await api.post('/simulation/run', payload); + setCurrentExecSn(data.execSn); + + // 직접 입력으로 신규 생성된 경우: selectedAnalysis 갱신 + incidentName 초기화 + if (data.acdntSn && isDirectInput) { + setSelectedAnalysis({ + acdntSn: data.acdntSn, + acdntNm: incidentName.trim(), + occurredAt: accidentTime ? `${accidentTime}:00` : '', + analysisDate: new Date().toISOString(), + requestor: '', + duration: String(predictionTime), + oilType, + volume: spillAmount, + location: '', + lat: incidentCoord.lat, + lon: incidentCoord.lon, + kospsStatus: 'pending', + poseidonStatus: 'pending', + opendriftStatus: 'pending', + backtrackStatus: 'pending', + analyst: '', + officeName: '', + } as Analysis); + // 다음 실행 시 동일 사고 재생성 방지 — 이후에는 selectedAnalysis.acdntSn 사용 + setIncidentName(''); + } + // setIsRunningSimulation(false)는 폴링 결과 useEffect에서 처리 } catch { - // 백엔드 미구현 — 클라이언트 데모 궤적 fallback - console.info('[prediction] 서버 시뮬레이션 미구현, 데모 궤적 생성') - const models = Array.from(selectedModels) - const demoTrajectory = generateDemoTrajectory(incidentCoord, models, predictionTime) - setOilTrajectory(demoTrajectory) - - // AI 방어선 자동 생성 - const demoBooms = generateAIBoomLines(demoTrajectory, incidentCoord, algorithmSettings) - setBoomLines(demoBooms) - - // 민감자원 로드 - setSensitiveResources(DEMO_SENSITIVE_RESOURCES) - } finally { - setIsRunningSimulation(false) + setIsRunningSimulation(false); + // 503 등 에러 시 상태 복원 (에러 메시지 표시는 향후 토스트로 처리) } } + const handleOpenReport = () => { + const OIL_TYPE_CODE: Record = { + '벙커C유': 'BUNKER_C', '경유': 'DIESEL', '원유': 'CRUDE_OIL', '윤활유': 'LUBE_OIL', + }; + const accidentName = + selectedAnalysis?.acdntNm || + analysisDetail?.acdnt?.acdntNm || + incidentName || + '(미입력)'; + const occurTime = + selectedAnalysis?.occurredAt || + analysisDetail?.acdnt?.occurredAt || + accidentTime || + ''; + const wx = analysisDetail?.weather?.[0] ?? null; + + const payload: OilReportPayload = { + incident: { + name: accidentName, + occurTime, + location: selectedAnalysis?.location || analysisDetail?.acdnt?.location || '', + lat: incidentCoord?.lat ?? selectedAnalysis?.lat ?? null, + lon: incidentCoord?.lon ?? selectedAnalysis?.lon ?? null, + pollutant: OIL_TYPE_CODE[oilType] || oilType, + spillAmount: `${spillAmount} ${spillUnit}`, + shipName: analysisDetail?.vessels?.[0]?.vesselNm || '', + }, + pollution: { + spillAmount: `${spillAmount.toFixed(2)} ${spillUnit}`, + weathered: simulationSummary ? `${simulationSummary.weatheredVolume.toFixed(2)} m³` : '—', + seaRemain: simulationSummary ? `${simulationSummary.remainingVolume.toFixed(2)} m³` : '—', + pollutionArea: simulationSummary ? `${simulationSummary.pollutionArea.toFixed(2)} km²` : '—', + coastAttach: simulationSummary ? `${simulationSummary.beachedVolume.toFixed(2)} m³` : '—', + coastLength: simulationSummary ? `${simulationSummary.pollutionCoastLength.toFixed(2)} km` : '—', + oilType: OIL_TYPE_CODE[oilType] || oilType, + }, + weather: wx + ? { windDir: wx.wind, windSpeed: wx.wind, waveHeight: wx.wave, temp: wx.temp } + : null, + spread: { kosps: '—', openDrift: '—', poseidon: '—' }, + coastal: { + firstTime: (() => { + const beachedTimes = oilTrajectory.filter(p => p.stranded === 1).map(p => p.time); + if (beachedTimes.length === 0) return null; + const d = new Date(Math.min(...beachedTimes) * 1000); + return `${String(d.getHours()).padStart(2, '0')}:${String(d.getMinutes()).padStart(2, '0')}`; + })(), + }, + hasSimulation: simulationSummary !== null, + }; + + setOilReportPayload(payload); + setReportGenCategory(0); + navigateToTab('reports', 'generate'); + }; + return ( -
+
{/* Left Sidebar */} {activeSubTab === 'analysis' && ( setIsSelectingLocation(true)} + isSelectingLocation={isSelectingLocation} + onMapSelectClick={() => setIsSelectingLocation(prev => !prev)} onRunSimulation={handleRunSimulation} isRunningSimulation={isRunningSimulation} selectedModels={selectedModels} @@ -420,6 +686,10 @@ export function OilSpillView() { onOilTypeChange={setOilType} spillAmount={spillAmount} onSpillAmountChange={setSpillAmount} + incidentName={incidentName} + onIncidentNameChange={setIncidentName} + spillUnit={spillUnit} + onSpillUnitChange={setSpillUnit} boomLines={boomLines} onBoomLinesChange={setBoomLines} oilTrajectory={oilTrajectory} @@ -435,6 +705,7 @@ export function OilSpillView() { onLayerOpacityChange={setLayerOpacity} layerBrightness={layerBrightness} onLayerBrightnessChange={setLayerBrightness} + onImageAnalysisResult={handleImageAnalysisResult} /> )} @@ -450,7 +721,8 @@ export function OilSpillView() { <> 0 ? { + centerPoints={centerPoints} + windData={windData} + hydrData={hydrData} + flyToTarget={flyToTarget} + fitBoundsTarget={fitBoundsTarget} + onIncidentFlyEnd={handleFlyEnd} + externalCurrentTime={oilTrajectory.length > 0 ? currentStep : undefined} + backtrackReplay={isReplayActive && replayShips.length > 0 && incidentCoord ? { isActive: true, ships: replayShips, - collisionEvent: collisionEvent || undefined, + collisionEvent: collisionEvent ?? null, replayFrame, totalFrames: TOTAL_REPLAY_FRAMES, incidentCoord, @@ -472,148 +751,166 @@ export function OilSpillView() { /> {/* 타임라인 플레이어 (리플레이 비활성 시) */} - {!isReplayActive &&
- {/* 컨트롤 버튼 */} -
- {[ - { icon: '⏮', action: () => setTimelinePosition(0) }, - { icon: '◀', action: () => setTimelinePosition(Math.max(0, timelinePosition - 100 / 12)) }, - ].map((btn, i) => ( - - ))} - - {[ - { icon: '▶▶', action: () => setTimelinePosition(Math.min(100, timelinePosition + 100 / 12)) }, - { icon: '⏭', action: () => setTimelinePosition(100) }, - ].map((btn, i) => ( - - ))} -
- -
- - {/* 타임라인 슬라이더 */} -
- {/* 시간 라벨 */} -
- {['0h', '6h', '12h', '18h', '24h', '36h', '48h', '60h', '72h'].map((label, i) => { - const pos = [0, 8.33, 16.67, 25, 33.33, 50, 66.67, 83.33, 100][i] - const isActive = Math.abs(timelinePosition - pos) < 5 - return ( - setTimelinePosition(pos)}>{label} - ) - })} -
- - {/* 슬라이더 트랙 */} -
- {/* 트랙 레일 */} -
{ - const rect = e.currentTarget.getBoundingClientRect() - setTimelinePosition(Math.max(0, Math.min(100, ((e.clientX - rect.left) / rect.width) * 100))) - }} - > - {/* 진행 바 */} -
- {/* 주요 마커 */} - {[0, 16.67, 33.33, 50, 66.67, 83.33, 100].map((pos) => ( -
+ {!isReplayActive && (() => { + const progressPct = maxTime > 0 ? (currentStep / maxTime) * 100 : 0; + // 동적 라벨: 스텝 수에 따라 균등 분배 + const visibleLabels: number[] = (() => { + if (timeSteps.length === 0) return [0]; + if (timeSteps.length <= 8) return timeSteps; + const interval = Math.ceil(timeSteps.length / 7); + return timeSteps.filter((_, i) => i % interval === 0 || i === timeSteps.length - 1); + })(); + return ( +
+ {/* 컨트롤 버튼 */} +
+ {[ + { icon: '⏮', action: () => { setCurrentStep(timeSteps[0] ?? 0); setIsPlaying(false); } }, + { icon: '◀', action: () => { const idx = timeSteps.indexOf(currentStep); if (idx > 0) setCurrentStep(timeSteps[idx - 1]); } }, + ].map((btn, i) => ( + ))} - {/* 보조 마커 */} - {[8.33, 25].map((pos) => ( -
- ))} - {/* 방어선 설치 이벤트 마커 */} - {boomLines.length > 0 && [ - { pos: 4.2, label: '1차 방어선 설치 (+3h)' }, - { pos: 8.3, label: '2차 방어선 설치 (+6h)' }, - { pos: 12.5, label: '3차 방어선 설치 (+9h)' }, - ].slice(0, boomLines.length).map((bm, i) => ( -
🛡
+ + {[ + { icon: '▶▶', action: () => { const idx = timeSteps.indexOf(currentStep); if (idx < timeSteps.length - 1) setCurrentStep(timeSteps[idx + 1]); } }, + { icon: '⏭', action: () => { setCurrentStep(maxTime); setIsPlaying(false); } }, + ].map((btn, i) => ( + ))} +
+
- {/* 드래그 핸들 */} -
-
-
- {/* 시간 정보 */} -
-
- +{Math.round(timelinePosition * 72 / 100)}h — {(() => { - const d = new Date(); d.setHours(d.getHours() + Math.round(timelinePosition * 72 / 100)) - return `${String(d.getMonth() + 1).padStart(2, '0')}/${String(d.getDate()).padStart(2, '0')} ${String(d.getHours()).padStart(2, '0')}:${String(d.getMinutes()).padStart(2, '0')} KST` - })()} -
-
- {[ - { label: '풍화율', value: `${Math.min(99, Math.round(timelinePosition * 0.4))}%` }, - { label: '면적', value: `${(timelinePosition * 0.08).toFixed(1)} km²` }, - { label: '차단율', value: boomLines.length > 0 ? `${Math.min(95, 70 + Math.round(timelinePosition * 0.2))}%` : '—', color: 'var(--boom)' }, - ].map((s, i) => ( -
- {s.label} - {s.value} + {/* 타임라인 슬라이더 */} +
+ {/* 동적 시간 라벨 */} +
+ {visibleLabels.map(t => { + const pos = maxTime > 0 ? (t / maxTime) * 100 : 0; + const isActive = t === currentStep; + return ( + setCurrentStep(t)}>{t}h + ) + })}
- ))} + + {/* 슬라이더 트랙 */} +
+
{ + if (timeSteps.length === 0) return; + const rect = e.currentTarget.getBoundingClientRect(); + const pct = (e.clientX - rect.left) / rect.width; + const targetTime = pct * maxTime; + const closest = timeSteps.reduce((a, b) => + Math.abs(b - targetTime) < Math.abs(a - targetTime) ? b : a + ); + setCurrentStep(closest); + }} + > + {/* 진행 바 */} +
+ {/* 스텝 마커 (각 타임스텝 위치에 틱 표시) */} + {timeSteps.map(t => { + const pos = maxTime > 0 ? (t / maxTime) * 100 : 0; + return ( +
+ ); + })} + {/* 방어선 설치 이벤트 마커 */} + {boomLines.length > 0 && [ + { pos: 4.2, label: '1차 방어선 설치 (+3h)' }, + { pos: 8.3, label: '2차 방어선 설치 (+6h)' }, + { pos: 12.5, label: '3차 방어선 설치 (+9h)' }, + ].slice(0, boomLines.length).map((bm, i) => ( +
🛡
+ ))} +
+ {/* 드래그 핸들 */} +
+
+
+ + {/* 시간 정보 */} +
+
+ +{currentStep}h — {(() => { + const d = new Date(); d.setHours(d.getHours() + currentStep); + return `${String(d.getMonth() + 1).padStart(2, '0')}/${String(d.getDate()).padStart(2, '0')} ${String(d.getHours()).padStart(2, '0')}:${String(d.getMinutes()).padStart(2, '0')} KST`; + })()} +
+
+ {[ + { label: '풍화율', value: `${Math.min(99, Math.round(progressPct * 0.4))}%` }, + { label: '면적', value: `${(progressPct * 0.08).toFixed(1)} km²` }, + { label: '차단율', value: boomLines.length > 0 ? `${Math.min(95, 70 + Math.round(progressPct * 0.2))}%` : '—', color: 'var(--boom)' }, + ].map((s, i) => ( +
+ {s.label} + {s.value} +
+ ))} +
+
-
-
} + ); + })()} {/* 역추적 리플레이 바 */} {isReplayActive && ( @@ -627,7 +924,7 @@ export function OilSpillView() { onSpeedChange={setReplaySpeed} onClose={handleCloseReplay} replayShips={replayShips} - collisionEvent={collisionEvent || undefined} + collisionEvent={collisionEvent} /> )} @@ -635,7 +932,15 @@ export function OilSpillView() {
{/* Right Panel */} - {activeSubTab === 'analysis' && setRecalcModalOpen(true)} onOpenReport={() => { setReportGenCategory(0); navigateToTab('reports', 'generate') }} detail={analysisDetail} />} + {activeSubTab === 'analysis' && setRecalcModalOpen(true)} onOpenReport={handleOpenReport} detail={analysisDetail} summary={simulationSummary} />} + + {/* 확산 예측 실행 중 로딩 오버레이 */} + {isRunningSimulation && ( + + )} {/* 재계산 모달 */} { setOilType(params.oilType) diff --git a/frontend/src/tabs/prediction/components/PredictionInputSection.tsx b/frontend/src/tabs/prediction/components/PredictionInputSection.tsx index 854a010..aaffef1 100644 --- a/frontend/src/tabs/prediction/components/PredictionInputSection.tsx +++ b/frontend/src/tabs/prediction/components/PredictionInputSection.tsx @@ -1,14 +1,19 @@ -import { useState } from 'react' +import { useState, useRef } from 'react' import { decimalToDMS } from '@common/utils/coordinates' import { ComboBox } from '@common/components/ui/ComboBox' import { ALL_MODELS } from './OilSpillView' import type { PredictionModel } from './OilSpillView' +import { analyzeImage } from '../services/predictionApi' +import type { ImageAnalyzeResult } from '../services/predictionApi' interface PredictionInputSectionProps { expanded: boolean onToggle: () => void - incidentCoord: { lon: number; lat: number } + accidentTime: string + onAccidentTimeChange: (time: string) => void + incidentCoord: { lon: number; lat: number } | null onCoordChange: (coord: { lon: number; lat: number }) => void + isSelectingLocation: boolean onMapSelectClick: () => void onRunSimulation: () => void isRunningSimulation: boolean @@ -22,13 +27,21 @@ interface PredictionInputSectionProps { onOilTypeChange: (type: string) => void spillAmount: number onSpillAmountChange: (amount: number) => void + incidentName: string + onIncidentNameChange: (name: string) => void + spillUnit: string + onSpillUnitChange: (unit: string) => void + onImageAnalysisResult?: (result: ImageAnalyzeResult) => void } const PredictionInputSection = ({ expanded, onToggle, + accidentTime, + onAccidentTimeChange, incidentCoord, onCoordChange, + isSelectingLocation, onMapSelectClick, onRunSimulation, isRunningSimulation, @@ -42,26 +55,57 @@ const PredictionInputSection = ({ onOilTypeChange, spillAmount, onSpillAmountChange, + incidentName, + onIncidentNameChange, + spillUnit, + onSpillUnitChange, + onImageAnalysisResult, }: PredictionInputSectionProps) => { const [inputMode, setInputMode] = useState<'direct' | 'upload'>('direct') - const [uploadedImage, setUploadedImage] = useState(null) - const [uploadedFileName, setUploadedFileName] = useState('') + const [uploadedFile, setUploadedFile] = useState(null) + const [isAnalyzing, setIsAnalyzing] = useState(false) + const [analyzeError, setAnalyzeError] = useState(null) + const [analyzeResult, setAnalyzeResult] = useState(null) + const fileInputRef = useRef(null) - const handleImageUpload = (e: React.ChangeEvent) => { - const file = e.target.files?.[0] - if (file) { - setUploadedFileName(file.name) - const reader = new FileReader() - reader.onload = (event) => { - setUploadedImage(event.target?.result as string) - } - reader.readAsDataURL(file) - } + const handleFileSelect = (e: React.ChangeEvent) => { + const file = e.target.files?.[0] ?? null + setUploadedFile(file) + setAnalyzeError(null) + setAnalyzeResult(null) } - const removeUploadedImage = () => { - setUploadedImage(null) - setUploadedFileName('') + const handleRemoveFile = () => { + setUploadedFile(null) + setAnalyzeError(null) + setAnalyzeResult(null) + if (fileInputRef.current) fileInputRef.current.value = '' + } + + const handleAnalyze = async () => { + if (!uploadedFile) return + setIsAnalyzing(true) + setAnalyzeError(null) + try { + const result = await analyzeImage(uploadedFile) + setAnalyzeResult(result) + onImageAnalysisResult?.(result) + } catch (err: unknown) { + if (err && typeof err === 'object' && 'response' in err) { + const res = (err as { response?: { data?: { error?: string } } }).response + if (res?.data?.error === 'GPS_NOT_FOUND') { + setAnalyzeError('GPS 정보가 없는 이미지입니다') + return + } + if (res?.data?.error === 'TIMEOUT') { + setAnalyzeError('분석 서버 응답 없음 (시간 초과)') + return + } + } + setAnalyzeError('이미지 분석 중 오류가 발생했습니다') + } finally { + setIsAnalyzing(false) + } } return ( @@ -88,8 +132,7 @@ const PredictionInputSection = ({ name="prdType" checked={inputMode === 'direct'} onChange={() => setInputMode('direct')} - className="m-0 w-[11px] h-[11px]" - className="accent-[var(--cyan)]" + className="accent-[var(--cyan)] m-0 w-[11px] h-[11px]" /> 직접 입력 @@ -99,8 +142,7 @@ const PredictionInputSection = ({ name="prdType" checked={inputMode === 'upload'} onChange={() => setInputMode('upload')} - className="m-0 w-[11px] h-[11px]" - className="accent-[var(--cyan)]" + className="accent-[var(--cyan)] m-0 w-[11px] h-[11px]" /> 이미지 업로드 @@ -109,43 +151,23 @@ const PredictionInputSection = ({ {/* Direct Input Mode */} {inputMode === 'direct' && ( <> - - + onIncidentNameChange(e.target.value)} + /> + )} {/* Image Upload Mode */} {inputMode === 'upload' && ( <> - - {}} - options={[ - { value: '', label: '여수 유조선 충돌 (INC-0042)' }, - { value: 'INC-0042', label: '여수 유조선 충돌 (INC-0042)' } - ]} - placeholder="사고 선택" - /> - - {/* Upload Success Message */} - {uploadedImage && ( -
- - 내 이미지가 업로드됨 -
- )} - - {/* File Upload Area */} - {!uploadedImage ? ( - ) : ( -
- 📄 {uploadedFileName || 'example_plot_0.gif'} +
+ 📄 {uploadedFile.name}
)} - {/* Dropdowns */} -
- {}} - options={[ - { value: '', label: '유출회사' }, - { value: 'company1', label: '회사A' }, - { value: 'company2', label: '회사B' } - ]} - placeholder="유출회사" - /> - {}} - options={[ - { value: '', label: '예상시각' }, - { value: '09:00', label: '09:00' }, - { value: '12:00', label: '12:00' } - ]} - placeholder="예상시각" - /> -
+ {/* 분석 실행 버튼 */} + + + {/* 에러 메시지 */} + {analyzeError && ( +
+ ⚠ {analyzeError} +
+ )} + + {/* 분석 완료 메시지 */} + {analyzeResult && ( +
+ ✓ 분석 완료
+ + 위도 {analyzeResult.lat.toFixed(4)} / 경도 {analyzeResult.lon.toFixed(4)}
+ 유종: {analyzeResult.oilType} / 면적: {analyzeResult.area.toFixed(1)} m² +
+
+ )} )} + {/* 사고 발생 시각 */} +
+ + onAccidentTimeChange(e.target.value)} + style={{ colorScheme: 'dark' }} + /> +
+ {/* Coordinates + Map Button */}
@@ -230,7 +282,7 @@ const PredictionInputSection = ({ value={incidentCoord?.lat ?? ''} onChange={(e) => { const value = e.target.value === '' ? 0 : parseFloat(e.target.value) - onCoordChange({ ...incidentCoord, lat: isNaN(value) ? 0 : value }) + onCoordChange({ lon: incidentCoord?.lon ?? 0, lat: isNaN(value) ? 0 : value }) }} placeholder="위도°" /> @@ -241,19 +293,21 @@ const PredictionInputSection = ({ value={incidentCoord?.lon ?? ''} onChange={(e) => { const value = e.target.value === '' ? 0 : parseFloat(e.target.value) - onCoordChange({ ...incidentCoord, lon: isNaN(value) ? 0 : value }) + onCoordChange({ lat: incidentCoord?.lat ?? 0, lon: isNaN(value) ? 0 : value }) }} placeholder="경도°" /> - +
{/* 도분초 표시 */} {incidentCoord && !isNaN(incidentCoord.lat) && !isNaN(incidentCoord.lon) && ( -
+
{decimalToDMS(incidentCoord.lat, true)} / {decimalToDMS(incidentCoord.lon, false)}
)} @@ -299,8 +353,8 @@ const PredictionInputSection = ({ /> {}} + value={spillUnit} + onChange={onSpillUnitChange} options={[ { value: 'kL', label: 'kL' }, { value: 'ton', label: 'Ton' }, @@ -321,19 +375,6 @@ const PredictionInputSection = ({ />
- {/* Image Analysis Note (Upload Mode Only) */} - {inputMode === 'upload' && uploadedImage && ( -
- 📊 이미지 내 확산경로를 분석하였습니다. 각 방제요소 가이드 참고하세요. -
- )} - {/* Divider */}
diff --git a/frontend/src/tabs/prediction/components/RightPanel.tsx b/frontend/src/tabs/prediction/components/RightPanel.tsx index 64439cb..ac1709b 100755 --- a/frontend/src/tabs/prediction/components/RightPanel.tsx +++ b/frontend/src/tabs/prediction/components/RightPanel.tsx @@ -1,7 +1,7 @@ import { useState } from 'react' -import type { PredictionDetail } from '../services/predictionApi' +import type { PredictionDetail, SimulationSummary } from '../services/predictionApi' -export function RightPanel({ onOpenBacktrack, onOpenRecalc, onOpenReport, detail }: { onOpenBacktrack?: () => void; onOpenRecalc?: () => void; onOpenReport?: () => void; detail?: PredictionDetail | null }) { +export function RightPanel({ onOpenBacktrack, onOpenRecalc, onOpenReport, detail, summary }: { onOpenBacktrack?: () => void; onOpenRecalc?: () => void; onOpenReport?: () => void; detail?: PredictionDetail | null; summary?: SimulationSummary | null }) { const vessel = detail?.vessels?.[0] const vessel2 = detail?.vessels?.[1] const spill = detail?.spill @@ -44,11 +44,11 @@ export function RightPanel({ onOpenBacktrack, onOpenRecalc, onOpenReport, detail
- - - + + +
- +
diff --git a/frontend/src/tabs/prediction/components/SimulationLoadingOverlay.tsx b/frontend/src/tabs/prediction/components/SimulationLoadingOverlay.tsx new file mode 100644 index 0000000..4130de0 --- /dev/null +++ b/frontend/src/tabs/prediction/components/SimulationLoadingOverlay.tsx @@ -0,0 +1,123 @@ +interface SimulationLoadingOverlayProps { + status: 'PENDING' | 'RUNNING'; + progress?: number; +} + +const SimulationLoadingOverlay = ({ status, progress }: SimulationLoadingOverlayProps) => { + const displayProgress = progress ?? 0; + const statusText = status === 'PENDING' ? '모델 초기화 중...' : '입자 추적 계산 중...'; + + return ( +
+
+ {/* 아이콘 + 제목 */} +
+
+ + + +
+
+
+ 확산 예측 분석 중 +
+
+ {statusText} +
+
+
+ + {/* 진행률 바 */} +
+
+
+
+
+ + {status === 'PENDING' ? '대기 중' : '분석 진행 중'} + + + {status === 'PENDING' ? '—' : `${displayProgress}%`} + +
+
+ + {/* 안내 문구 */} +
+ OpenDrift 모델로 유류 확산을 시뮬레이션하고 있습니다. +
+ 완료되면 자동으로 결과가 표시됩니다. +
+
+
+ ); +}; + +export default SimulationLoadingOverlay; diff --git a/frontend/src/tabs/prediction/components/leftPanelTypes.ts b/frontend/src/tabs/prediction/components/leftPanelTypes.ts index a7ab8a9..22fbe14 100644 --- a/frontend/src/tabs/prediction/components/leftPanelTypes.ts +++ b/frontend/src/tabs/prediction/components/leftPanelTypes.ts @@ -1,13 +1,17 @@ import type { PredictionModel } from './OilSpillView' import type { BoomLine, BoomLineCoord, AlgorithmSettings, ContainmentResult } from '@common/types/boomLine' import type { Analysis } from './AnalysisListTable' +import type { ImageAnalyzeResult } from '../services/predictionApi' export interface LeftPanelProps { selectedAnalysis?: Analysis | null enabledLayers: Set onToggleLayer: (layerId: string, enabled: boolean) => void - incidentCoord: { lon: number; lat: number } + accidentTime: string + onAccidentTimeChange: (time: string) => void + incidentCoord: { lon: number; lat: number } | null onCoordChange: (coord: { lon: number; lat: number }) => void + isSelectingLocation: boolean onMapSelectClick: () => void onRunSimulation: () => void isRunningSimulation: boolean @@ -21,6 +25,10 @@ export interface LeftPanelProps { onOilTypeChange: (type: string) => void spillAmount: number onSpillAmountChange: (amount: number) => void + incidentName: string + onIncidentNameChange: (name: string) => void + spillUnit: string + onSpillUnitChange: (unit: string) => void // 오일펜스 배치 관련 boomLines: BoomLine[] onBoomLinesChange: (lines: BoomLine[]) => void @@ -38,6 +46,8 @@ export interface LeftPanelProps { onLayerOpacityChange: (val: number) => void layerBrightness: number onLayerBrightnessChange: (val: number) => void + // 이미지 분석 결과 콜백 + onImageAnalysisResult?: (result: ImageAnalyzeResult) => void } export interface ExpandedSections { diff --git a/frontend/src/tabs/prediction/hooks/useSimulationStatus.ts b/frontend/src/tabs/prediction/hooks/useSimulationStatus.ts new file mode 100644 index 0000000..0445057 --- /dev/null +++ b/frontend/src/tabs/prediction/hooks/useSimulationStatus.ts @@ -0,0 +1,16 @@ +import { useQuery } from '@tanstack/react-query'; +import { api } from '@common/services/api'; +import type { SimulationStatusResponse } from '../services/predictionApi'; + +export const useSimulationStatus = (execSn: number | null) => { + return useQuery({ + queryKey: ['simulationStatus', execSn], + queryFn: () => api.get(`/simulation/status/${execSn}`).then(r => r.data), + enabled: execSn !== null, + refetchInterval: (query) => { + const status = query.state.data?.status; + if (status === 'DONE' || status === 'ERROR') return false; + return 3000; + }, + }); +}; diff --git a/frontend/src/tabs/prediction/services/predictionApi.ts b/frontend/src/tabs/prediction/services/predictionApi.ts index fcd0d65..308d24d 100644 --- a/frontend/src/tabs/prediction/services/predictionApi.ts +++ b/frontend/src/tabs/prediction/services/predictionApi.ts @@ -115,3 +115,105 @@ export const createBacktrack = async (input: { const response = await api.post<{ backtrackSn: number }>('/prediction/backtrack', input); return response.data; }; + +// ============================================================ +// 확산 예측 시뮬레이션 (OpenDrift 연동) +// ============================================================ + +export interface SimulationRunResponse { + success: boolean; + execSn: number; + acdntSn: number | null; + status: 'RUNNING'; +} + +export interface WindPoint { + lat: number; + lon: number; + wind_speed: number; + wind_direction: number; +} + +export interface HydrGrid { + lonInterval: number[]; + boundLonLat: { top: number; bottom: number; left: number; right: number }; + rows: number; + cols: number; + latInterval: number[]; +} + +export interface HydrDataStep { + value: [number[][], number[][]]; // [u_2d, v_2d] + grid: HydrGrid; +} + +export interface CenterPoint { + lat: number; + lon: number; + time: number; +} + +export interface OilParticle { + lat: number; + lon: number; + time: number; + particle?: number; + stranded?: 0 | 1; +} + +export interface SimulationSummary { + remainingVolume: number; + weatheredVolume: number; + pollutionArea: number; + beachedVolume: number; + pollutionCoastLength: number; +} + +export interface SimulationStatusResponse { + status: 'PENDING' | 'RUNNING' | 'DONE' | 'ERROR'; + progress?: number; + trajectory?: OilParticle[]; + summary?: SimulationSummary; + centerPoints?: CenterPoint[]; + windData?: WindPoint[][]; + hydrData?: (HydrDataStep | null)[]; + error?: string; +} + +export interface TrajectoryResponse { + trajectory: OilParticle[] | null; + summary: SimulationSummary | null; + centerPoints?: CenterPoint[]; + windData?: WindPoint[][]; + hydrData?: (HydrDataStep | null)[]; +} + +export const fetchAnalysisTrajectory = async (acdntSn: number): Promise => { + const response = await api.get(`/prediction/analyses/${acdntSn}/trajectory`); + return response.data; +}; + +// ============================================================ +// 이미지 업로드 분석 +// ============================================================ + +export interface ImageAnalyzeResult { + acdntSn: number; + lat: number; + lon: number; + oilType: string; + area: number; + volume: number; + fileId: string; + occurredAt: string; +} + +export const analyzeImage = async (file: File): Promise => { + const formData = new FormData(); + formData.append('image', file); + const response = await api.post('/prediction/image-analyze', formData, { + headers: { 'Content-Type': 'multipart/form-data' }, + timeout: 330_000, + }); + return response.data; +}; diff --git a/frontend/src/tabs/reports/components/ReportGenerator.tsx b/frontend/src/tabs/reports/components/ReportGenerator.tsx index bb9cd2a..9e780fd 100644 --- a/frontend/src/tabs/reports/components/ReportGenerator.tsx +++ b/frontend/src/tabs/reports/components/ReportGenerator.tsx @@ -2,7 +2,7 @@ import { useState, useEffect } from 'react'; import { createEmptyReport, } from './OilSpillReportTemplate'; -import { consumeReportGenCategory, consumeHnsReportPayload, type HnsReportPayload } from '@common/hooks/useSubMenu'; +import { consumeReportGenCategory, consumeHnsReportPayload, type HnsReportPayload, consumeOilReportPayload, type OilReportPayload } from '@common/hooks/useSubMenu'; import { saveReport } from '../services/reportsApi'; import { CATEGORIES, @@ -32,6 +32,8 @@ function ReportGenerator({ onSave }: ReportGeneratorProps) { // HNS 실 데이터 (없으면 sampleHnsData fallback) const [hnsPayload, setHnsPayload] = useState(null) + // OIL 실 데이터 (없으면 sampleOilData fallback) + const [oilPayload, setOilPayload] = useState(null) // 외부에서 카테고리 힌트가 변경되면 반영 useEffect(() => { @@ -44,6 +46,9 @@ function ReportGenerator({ onSave }: ReportGeneratorProps) { // HNS 데이터 소비 const payload = consumeHnsReportPayload() if (payload) setHnsPayload(payload) + // OIL 예측 데이터 소비 + const oilData = consumeOilReportPayload() + if (oilData) setOilPayload(oilData) }, []) const cat = CATEGORIES[activeCat] @@ -65,8 +70,19 @@ function ReportGenerator({ onSave }: ReportGeneratorProps) { report.status = '완료' report.author = '시스템 자동생성' if (activeCat === 0) { - report.incident.pollutant = sampleOilData.pollution.oilType - report.incident.spillAmount = sampleOilData.pollution.spillAmount + if (oilPayload) { + report.incident.name = oilPayload.incident.name; + report.incident.occurTime = oilPayload.incident.occurTime; + report.incident.location = oilPayload.incident.location; + report.incident.lat = String(oilPayload.incident.lat ?? ''); + report.incident.lon = String(oilPayload.incident.lon ?? ''); + report.incident.shipName = oilPayload.incident.shipName; + report.incident.pollutant = oilPayload.pollution.oilType; + report.incident.spillAmount = oilPayload.pollution.spillAmount; + } else { + report.incident.pollutant = sampleOilData.pollution.oilType; + report.incident.spillAmount = sampleOilData.pollution.spillAmount; + } } try { await saveReport(report) @@ -82,6 +98,24 @@ function ReportGenerator({ onSave }: ReportGeneratorProps) { const sectionHTML = activeSections.map(sec => { let content = `

${sec.desc}

`; + // OIL 섹션에 실 데이터 삽입 + if (activeCat === 0 && oilPayload) { + if (sec.id === 'oil-pollution') { + const rows = [ + ['유출량', oilPayload.pollution.spillAmount, '풍화량', oilPayload.pollution.weathered], + ['해상잔유량', oilPayload.pollution.seaRemain, '오염해역면적', oilPayload.pollution.pollutionArea], + ['연안부착량', oilPayload.pollution.coastAttach, '오염해안길이', oilPayload.pollution.coastLength], + ]; + const simBanner = !oilPayload.hasSimulation + ? '

시뮬레이션이 실행되지 않아 오염량은 입력값 기준으로 표시됩니다.

' + : ''; + const trs = rows.map(r => + `${r[0]}${r[1]}${r[2]}${r[3]}` + ).join(''); + content = `${simBanner}${trs}
`; + } + } + // HNS 섹션에 실 데이터 삽입 if (activeCat === 1 && hnsPayload) { if (sec.id === 'hns-atm') { @@ -261,9 +295,9 @@ function ReportGenerator({ onSave }: ReportGeneratorProps) {
{[ - { label: 'KOSPS', value: sampleOilData.spread.kosps, color: '#06b6d4' }, - { label: 'OpenDrift', value: sampleOilData.spread.openDrift, color: '#ef4444' }, - { label: 'POSEIDON', value: sampleOilData.spread.poseidon, color: '#f97316' }, + { label: 'KOSPS', value: oilPayload?.spread.kosps || sampleOilData.spread.kosps, color: '#06b6d4' }, + { label: 'OpenDrift', value: oilPayload?.spread.openDrift || sampleOilData.spread.openDrift, color: '#ef4444' }, + { label: 'POSEIDON', value: oilPayload?.spread.poseidon || sampleOilData.spread.poseidon, color: '#f97316' }, ].map((m, i) => (

{m.label}

@@ -274,23 +308,30 @@ function ReportGenerator({ onSave }: ReportGeneratorProps) { )} {sec.id === 'oil-pollution' && ( - - - - {[ - ['유출량', sampleOilData.pollution.spillAmount, '풍화량', sampleOilData.pollution.weathered], - ['해상잔유량', sampleOilData.pollution.seaRemain, '오염해역면적', sampleOilData.pollution.pollutionArea], - ['연안부착량', sampleOilData.pollution.coastAttach, '오염해안길이', sampleOilData.pollution.coastLength], - ].map((row, i) => ( - - - - - - - ))} - -
{row[0]}{row[1]}{row[2]}{row[3]}
+ <> + {oilPayload && !oilPayload.hasSimulation && ( +
+ 시뮬레이션이 실행되지 않아 오염량은 입력값 기준으로 표시됩니다. +
+ )} + + + + {[ + ['유출량', oilPayload?.pollution.spillAmount || sampleOilData.pollution.spillAmount, '풍화량', oilPayload?.pollution.weathered || sampleOilData.pollution.weathered], + ['해상잔유량', oilPayload?.pollution.seaRemain || sampleOilData.pollution.seaRemain, '오염해역면적', oilPayload?.pollution.pollutionArea || sampleOilData.pollution.pollutionArea], + ['연안부착량', oilPayload?.pollution.coastAttach || sampleOilData.pollution.coastAttach, '오염해안길이', oilPayload?.pollution.coastLength || sampleOilData.pollution.coastLength], + ].map((row, i) => ( + + + + + + + ))} + +
{row[0]}{row[1]}{row[2]}{row[3]}
+ )} {sec.id === 'oil-sensitive' && ( <> @@ -304,9 +345,9 @@ function ReportGenerator({ onSave }: ReportGeneratorProps) { )} {sec.id === 'oil-coastal' && (

- 최초 부착시간: {sampleOilData.coastal.firstTime} + 최초 부착시간: {oilPayload?.coastal?.firstTime ?? sampleOilData.coastal.firstTime} {' / '} - 부착 해안길이: {sampleOilData.coastal.coastLength} + 부착 해안길이: {oilPayload?.pollution.coastLength || sampleOilData.coastal.coastLength}

)} {sec.id === 'oil-defense' && ( @@ -318,11 +359,20 @@ function ReportGenerator({ onSave }: ReportGeneratorProps) {
)} {sec.id === 'oil-tide' && ( -

- 고조: {sampleOilData.tide.highTide1} - {' / '}저조: {sampleOilData.tide.lowTide} - {' / '}고조: {sampleOilData.tide.highTide2} -

+ <> +

+ 고조: {sampleOilData.tide.highTide1} + {' / '}저조: {sampleOilData.tide.lowTide} + {' / '}고조: {sampleOilData.tide.highTide2} +

+ {oilPayload?.weather && ( +

+ 기상: 풍향/풍속 {oilPayload.weather.windDir} + {' / '}파고 {oilPayload.weather.waveHeight} + {' / '}기온 {oilPayload.weather.temp} +

+ )} + )} {/* ── HNS 대기확산 섹션들 ── */} diff --git a/prediction/image/.dockerignore b/prediction/image/.dockerignore new file mode 100644 index 0000000..3c3272b --- /dev/null +++ b/prediction/image/.dockerignore @@ -0,0 +1,13 @@ +__pycache__/ +stitch/ + +mx15hdi/Detect/Mask_result/ +mx15hdi/Detect/result/ + +mx15hdi/Georeference/Mask_Tif/ +mx15hdi/Georeference/Tif/ + +mx15hdi/Metadata/CSV/ +mx15hdi/Metadata/Image/Original_Images/ + +mx15hdi/Polygon/Shp/ \ No newline at end of file diff --git a/prediction/image/DOCKER_USAGE.md b/prediction/image/DOCKER_USAGE.md new file mode 100644 index 0000000..7f9d6d3 --- /dev/null +++ b/prediction/image/DOCKER_USAGE.md @@ -0,0 +1,376 @@ +# wing-image-analysis Docker 사용 가이드 + +드론 영상 기반 유류 오염 분석 FastAPI 서버를 Docker 컨테이너로 빌드하고 실행하는 방법을 설명한다. + +--- + +## 목차 + +1. [사전 요구사항](#1-사전-요구사항) +2. [빠른 시작](#2-빠른-시작) +3. [빌드 명령어](#3-빌드-명령어) +4. [실행 명령어](#4-실행-명령어) +5. [환경변수 설정](#5-환경변수-설정) +6. [볼륨 구조](#6-볼륨-구조) +7. [API 엔드포인트 사용 예시](#7-api-엔드포인트-사용-예시) +8. [로그 확인 및 디버깅](#8-로그-확인-및-디버깅) +9. [컨테이너 관리](#9-컨테이너-관리) +10. [주의사항](#10-주의사항) +11. [CPU 전용 환경 실행](#11-cpu-전용-환경-실행) + +--- + +## 1. 사전 요구사항 + +| 항목 | 최소 버전 | 확인 명령어 | +|------|----------|-------------| +| Docker Engine | 24.0 이상 | `docker --version` | +| Docker Compose | 2.20 이상 | `docker compose version` | +| NVIDIA 드라이버 | 525 이상 (CUDA 12.1 지원) | `nvidia-smi` | +| nvidia-container-toolkit | 최신 | `nvidia-ctk --version` | + +### nvidia-container-toolkit 설치 (Ubuntu 기준) + +```bash +# GPG 키 및 저장소 추가 +curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \ + | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg + +curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \ + | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \ + | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list + +sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit + +# Docker 런타임 설정 및 재시작 +sudo nvidia-ctk runtime configure --runtime=docker +sudo systemctl restart docker +``` + +### GPU 동작 확인 + +```bash +docker run --rm --gpus all nvidia/cuda:12.1-base-ubuntu22.04 nvidia-smi +``` + +--- + +## 2. 빠른 시작 + +```bash +# 1. prediction/image/ 디렉토리로 이동 +cd prediction/image + +# 2. 환경변수 파일 준비 (필요 시) +cp .env.example .env + +# 3. 빌드 + 실행 (백그라운드) +docker compose up -d --build + +# 4. 서버 상태 확인 +curl http://localhost:5001/docs +``` + +--- + +## 3. 빌드 명령어 + +### docker compose (권장) + +```bash +# 이미지 빌드만 수행 (실행 안 함) +docker compose build + +# 빌드 로그를 상세하게 출력 +docker compose build --progress=plain + +# 캐시 없이 처음부터 빌드 (의존성 변경 시) +docker compose build --no-cache +``` + +### docker build (단독) + +```bash +# prediction/image/ 디렉토리에서 실행 +docker build -t wing-image-analysis:latest . + +# 빌드 태그 지정 +docker build -t wing-image-analysis:1.0.0 . + +# 캐시 없이 빌드 +docker build --no-cache -t wing-image-analysis:latest . +``` + +> **참고**: 첫 빌드는 PyTorch base 이미지(약 8GB) + GDAL/Python 패키지 설치로 **30~60분** 소요될 수 있다. +> 이후 빌드는 레이어 캐시로 수 분 내 완료된다. + +--- + +## 4. 실행 명령어 + +### docker compose (권장) + +```bash +# 백그라운드 실행 +docker compose up -d + +# 빌드 후 즉시 실행 +docker compose up -d --build + +# 포그라운드 실행 (로그 바로 출력) +docker compose up + +# 중지 +docker compose down + +# 중지 + 볼륨 삭제 (데이터 초기화 시) +docker compose down -v +``` + +### docker run (단독 — 테스트용) + +```bash +docker run --rm \ + --gpus all \ + -p 5001:5001 \ + --env-file .env \ + -v "$(pwd)/mx15hdi/Metadata/Image/Original_Images:/app/mx15hdi/Metadata/Image/Original_Images" \ + wing-image-analysis:latest +``` + +--- + +## 5. 환경변수 설정 + +`.env.example`을 복사하여 `.env`를 생성한다. + +```bash +cp .env.example .env +``` + +| 변수 | 설명 | 기본값 | +|------|------|--------| +| `API_HOST` | 서버 바인드 주소 | `0.0.0.0` | +| `API_PORT` | 서버 포트 | `5001` | + +--- + +## 6. 볼륨 구조 + +컨테이너 내부 경로와 호스트 경로의 매핑이다. 이미지/결과 데이터는 컨테이너 외부에 저장되어 컨테이너를 재시작해도 유지된다. + +``` +호스트 (prediction/image/) 컨테이너 (/app/) +───────────────────────────────────────────────────────────────────── +mx15hdi/Metadata/Image/Original_Images/ → mx15hdi/Metadata/Image/Original_Images/ ← 원본 이미지 입력 +mx15hdi/Metadata/CSV/ → mx15hdi/Metadata/CSV/ ← 메타데이터 출력 +mx15hdi/Georeference/Tif/ → mx15hdi/Georeference/Tif/ ← GeoTIFF 출력 +mx15hdi/Georeference/Mask_Tif/ → mx15hdi/Georeference/Mask_Tif/ ← 마스크 GeoTIFF +mx15hdi/Polygon/Shp/ → mx15hdi/Polygon/Shp/ ← Shapefile 출력 +mx15hdi/Detect/result/ → mx15hdi/Detect/result/ ← 블렌딩 결과 +mx15hdi/Detect/Mask_result/ → mx15hdi/Detect/Mask_result/ ← 마스크 결과 +starsafire/Metadata/Image/Original_Images → starsafire/Metadata/Image/Original_Images ← 열화상 입력 +starsafire/{기타}/ → starsafire/{기타}/ ← 열화상 출력 +stitch/ → stitch/ ← 스티칭 결과 +``` + +--- + +## 7. API 엔드포인트 사용 예시 + +서버 기동 후 `http://localhost:5001/docs`에서 Swagger UI로 전체 API를 확인할 수 있다. + +### 7.1 전체 분석 파이프라인 실행 + +```bash +curl -X POST http://localhost:5001/run-script/ \ + -F "files=@/path/to/drone_image.jpg" \ + -F "camTy=mx15hdi" \ + -F "fileId=20240310_001" +``` + +**응답 예시**: +```json +{ + "meta": "drone_image.jpg,37,30,0,126,55,0,...", + "data": [ + { + "classId": 2, + "area": 1234.56, + "volume": 0.1234, + "note": "갈색", + "thickness": 0.0001, + "wkt": "POLYGON((...))" + } + ] +} +``` + +### 7.2 메타데이터 조회 + +```bash +curl http://localhost:5001/get-metadata/mx15hdi/20240310_001 +``` + +### 7.3 원본 이미지 조회 (Base64) + +```bash +curl http://localhost:5001/get-original-image/mx15hdi/20240310_001 +``` + +### 7.4 GeoTIFF + 좌표 조회 + +```bash +curl http://localhost:5001/get-image/mx15hdi/20240310_001 +``` + +### 7.5 이미지 스티칭 + +```bash +curl -X POST http://localhost:5001/stitch \ + -F "files=@photo1.jpg" \ + -F "files=@photo2.jpg" \ + -F "mode=drone" +``` + +--- + +## 8. 로그 확인 및 디버깅 + +```bash +# 실시간 로그 출력 +docker logs wing-image-analysis -f + +# 최근 100줄만 출력 +docker logs wing-image-analysis --tail 100 + +# 컨테이너 내부 쉘 접속 +docker exec -it wing-image-analysis bash + +# GPU 사용 현황 확인 (컨테이너 내부) +docker exec wing-image-analysis nvidia-smi + +# Python 패키지 목록 확인 +docker exec wing-image-analysis pip list +``` + +--- + +## 9. 컨테이너 관리 + +```bash +# 상태 확인 +docker compose ps + +# 재시작 +docker compose restart + +# 중지 (볼륨 유지) +docker compose down + +# 이미지 삭제 +docker rmi wing-image-analysis:latest + +# 사용하지 않는 리소스 정리 +docker system prune -f +``` + +--- + +## 10. 주의사항 + +### GPU 자동 감지 +- 서버 기동 시 `torch.cuda.is_available()`로 GPU 유무를 자동 감지한다. +- GPU가 있으면 `cuda:0`, 없으면 `cpu`로 자동 폴백된다. +- 환경변수 `DEVICE`로 device를 명시 지정할 수 있다 (예: `DEVICE=cpu`, `DEVICE=cuda:1`). + +### 첫 기동 시간 +- AI 모델 로드: 약 **10~30초** 소요 (GPU 메모리에 로딩) +- 준비 완료 후 로그에 `Application startup complete` 메시지가 출력된다. + +### workers=1 고정 +- GPU 모델은 프로세스 간 공유가 불가하므로 uvicorn workers는 반드시 `1`로 유지해야 한다. +- 병렬 처리는 내부 `ThreadPoolExecutor`(max_workers=4)로 처리된다. + +### 포트 충돌 +- 기본 포트 `5001`이 다른 서비스와 충돌하면 `docker-compose.yml`의 `ports` 항목을 수정한다: + ```yaml + ports: + - "5002:5001" # 호스트 5002 → 컨테이너 5001 + ``` + +--- + +## 11. CPU 전용 환경 실행 + +GPU(NVIDIA)가 없는 환경에서는 CPU 전용 설정을 사용한다. + +### 사전 요구사항 (CPU 모드) + +| 항목 | 최소 버전 | 확인 명령어 | +|------|----------|-------------| +| Docker Engine | 24.0 이상 | `docker --version` | +| Docker Compose | 2.20 이상 | `docker compose version` | +| NVIDIA 드라이버 | **불필요** | — | + +### 빠른 시작 (CPU) + +```bash +# prediction/image/ 디렉토리로 이동 +cd prediction/image + +# 환경변수 파일 준비 (필요 시) +cp .env.example .env + +# CPU 이미지 빌드 + 실행 +docker compose -f docker-compose.cpu.yml up -d --build + +# 서버 상태 확인 +curl http://localhost:5001/docs +``` + +### 빌드 명령어 (CPU) + +```bash +# CPU 이미지만 빌드 +docker compose -f docker-compose.cpu.yml build + +# 캐시 없이 빌드 +docker compose -f docker-compose.cpu.yml build --no-cache +``` + +> **참고**: CPU 기반 PyTorch 이미지는 GPU 이미지(~8GB) 대비 약 70% 용량이 절감된다. +> 단, CPU 추론은 GPU 대비 처리 속도가 느리므로 대용량 이미지 분석 시 시간이 더 소요된다. + +### 실행 명령어 (CPU) + +```bash +# 백그라운드 실행 +docker compose -f docker-compose.cpu.yml up -d + +# 포그라운드 실행 (로그 바로 출력) +docker compose -f docker-compose.cpu.yml up + +# 중지 +docker compose -f docker-compose.cpu.yml down +``` + +### 로컬 직접 실행 (Docker 없이) + +```bash +# GPU 있으면 자동으로 cuda:0 사용, 없으면 cpu로 폴백 +python api.py + +# device 강제 지정 +DEVICE=cpu python api.py +DEVICE=cuda:1 python api.py +``` + +### GPU/CPU 모드 확인 + +서버 기동 로그에서 사용 device를 확인할 수 있다: + +``` +[Inference] 사용 device: cpu ← CPU 모드 +[Inference] 사용 device: cuda:0 ← GPU 모드 +``` diff --git a/prediction/image/Dockerfile b/prediction/image/Dockerfile new file mode 100644 index 0000000..6da64a5 --- /dev/null +++ b/prediction/image/Dockerfile @@ -0,0 +1,84 @@ +# ============================================================================== +# wing-image-analysis — 드론 영상 유류 분석 FastAPI 서버 +# +# Base: PyTorch 1.9.1 + CUDA 11.1 + cuDNN 8 +# (mmsegmentation 0.25.0 / mmcv-full 1.4.3 호환 환경) +# GPU: NVIDIA GPU 필수 (MMSegmentation 추론) +# Port: 5001 +# ============================================================================== +FROM pytorch/pytorch:1.9.1-cuda11.1-cudnn8-devel + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +# ------------------------------------------------------------------------------ +# 시스템 패키지: GDAL / PROJ / GEOS (rasterio, geopandas 빌드 의존성) +# libpq-dev: psycopg2-binary 런타임 의존성 +# libspatialindex-dev: geopandas 공간 인덱스 +# ------------------------------------------------------------------------------ +RUN apt-get update && apt-get install -y --no-install-recommends \ + gdal-bin \ + libgdal-dev \ + libproj-dev \ + libgeos-dev \ + libspatialindex-dev \ + gcc \ + g++ \ + git \ + && rm -rf /var/lib/apt/lists/* + +# rasterio는 GDAL 헤더 버전을 맞춰 빌드해야 한다 +ENV GDAL_VERSION=3.4.1 + +# ------------------------------------------------------------------------------ +# mmcv-full 1.4.3 — CUDA 11.1 + PyTorch 1.9.0 pre-built 휠 +# (소스 컴파일 없이 수 초 내 설치) +# ------------------------------------------------------------------------------ +RUN pip install --no-cache-dir \ + mmcv-full==1.4.3 \ + -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html + +# ------------------------------------------------------------------------------ +# Python 의존성 설치 +# ------------------------------------------------------------------------------ +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# ------------------------------------------------------------------------------ +# 로컬 mmsegmentation 설치 (mx15hdi/Detect/mmsegmentation/) +# 번들 소스를 먼저 복사한 뒤 editable 설치한다 +# ------------------------------------------------------------------------------ +COPY mx15hdi/Detect/mmsegmentation/ /tmp/mmsegmentation/ +RUN pip install --no-cache-dir -e /tmp/mmsegmentation/ + +# ------------------------------------------------------------------------------ +# 소스 코드 전체 복사 +# 대용량 데이터 디렉토리(Original_Images, result 등)는 +# docker-compose.yml의 볼륨 마운트로 외부에서 주입된다 +# ------------------------------------------------------------------------------ +COPY . . + +# ------------------------------------------------------------------------------ +# .dockerignore로 제외된 런타임 출력 디렉토리를 빈 폴더로 생성 +# (볼륨 마운트 전에도 경로가 존재해야 한다) +# ------------------------------------------------------------------------------ +RUN mkdir -p \ + /app/stitch \ + /app/mx15hdi/Detect/Mask_result \ + /app/mx15hdi/Detect/result \ + /app/mx15hdi/Georeference/Mask_Tif \ + /app/mx15hdi/Georeference/Tif \ + /app/mx15hdi/Metadata/CSV \ + /app/mx15hdi/Metadata/Image/Original_Images \ + /app/mx15hdi/Polygon/Shp + +# ------------------------------------------------------------------------------ +# 런타임 설정 +# ------------------------------------------------------------------------------ +EXPOSE 5001 + +# workers=1: GPU 모델을 프로세스 하나에서만 로드 (메모리 공유 불가) +CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "5001", "--workers", "1"] diff --git a/prediction/image/Dockerfile.cpu b/prediction/image/Dockerfile.cpu new file mode 100644 index 0000000..dacdec2 --- /dev/null +++ b/prediction/image/Dockerfile.cpu @@ -0,0 +1,112 @@ +# ============================================================================== +# wing-image-analysis — 드론 영상 유류 분석 FastAPI 서버 (CPU 전용) +# +# Base: python:3.9-slim + PyTorch 1.9.0 CPU 빌드 +# (mmsegmentation 0.25.0 / mmcv-full 1.4.3 호환 환경) +# python:3.9 필수 — numpy 1.26.4, geopandas 0.14.4가 Python >=3.9 요구 +# GPU: 불필요 (CPU 추론) +# Port: 5001 +# ============================================================================== +FROM python:3.9-slim + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + DEVICE=cpu + +WORKDIR /app + +# ------------------------------------------------------------------------------ +# 시스템 패키지: GDAL / PROJ / GEOS (rasterio, geopandas 빌드 의존성) +# libspatialindex-dev: geopandas 공간 인덱스 +# opencv-contrib-python-headless 런타임 SO 의존성 (python:3.9-slim에 미포함): +# libgl1 — libGL.so.1 +# libglib2.0-0 — libgthread-2.0.so.0, libgobject-2.0.so.0, libglib-2.0.so.0 +# libsm6 — libSM.so.6 +# libxext6 — libXext.so.6 +# libxrender1 — libXrender.so.1 +# libgomp1 — libgomp.so.1 (OpenMP, numpy/opencv 병렬 처리) +# ------------------------------------------------------------------------------ +RUN apt-get update && apt-get install -y --no-install-recommends \ + gdal-bin \ + libgdal-dev \ + libproj-dev \ + libgeos-dev \ + libspatialindex-dev \ + libgl1 \ + libglib2.0-0 \ + libsm6 \ + libxext6 \ + libxrender1 \ + libgomp1 \ + gcc \ + g++ \ + git \ + && rm -rf /var/lib/apt/lists/* + +# rasterio는 GDAL 헤더 버전을 맞춰 빌드해야 한다 +ENV GDAL_VERSION=3.4.1 + +# ------------------------------------------------------------------------------ +# GDAL Python 바인딩 (osgeo 모듈) — 시스템 GDAL 버전과 일치해야 한다 +# python:3.9-slim은 conda 없이 pip 환경이므로 명시적 설치 필요 +# ------------------------------------------------------------------------------ +RUN pip install --no-cache-dir GDAL=="$(gdal-config --version)" + +# ------------------------------------------------------------------------------ +# PyTorch 1.9.0 CPU 버전 설치 +# (mmsegmentation 0.25.0 / mmcv-full 1.4.3 호환) +# ------------------------------------------------------------------------------ +RUN pip install --no-cache-dir \ + torch==1.9.0+cpu \ + torchvision==0.10.0+cpu \ + -f https://download.pytorch.org/whl/torch_stable.html + +# ------------------------------------------------------------------------------ +# mmcv-full 1.4.3 CPU 휠 (CUDA ops 없는 경량 빌드, 추론에 충분) +# ------------------------------------------------------------------------------ +RUN pip install --no-cache-dir \ + mmcv-full==1.4.3 \ + -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.9.0/index.html + +# ------------------------------------------------------------------------------ +# Python 의존성 설치 +# ------------------------------------------------------------------------------ +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# ------------------------------------------------------------------------------ +# 로컬 mmsegmentation 설치 (mx15hdi/Detect/mmsegmentation/) +# 번들 소스를 먼저 복사한 뒤 editable 설치한다 +# ------------------------------------------------------------------------------ +COPY mx15hdi/Detect/mmsegmentation/ /tmp/mmsegmentation/ +RUN pip install --no-cache-dir -e /tmp/mmsegmentation/ + +# ------------------------------------------------------------------------------ +# 소스 코드 전체 복사 +# 대용량 데이터 디렉토리(Original_Images, result 등)는 +# docker-compose.cpu.yml의 볼륨 마운트로 외부에서 주입된다 +# ------------------------------------------------------------------------------ +COPY . . + +# ------------------------------------------------------------------------------ +# .dockerignore로 제외된 런타임 출력 디렉토리를 빈 폴더로 생성 +# (볼륨 마운트 전에도 경로가 존재해야 한다) +# ------------------------------------------------------------------------------ +RUN mkdir -p \ + /app/stitch \ + /app/mx15hdi/Detect/Mask_result \ + /app/mx15hdi/Detect/result \ + /app/mx15hdi/Georeference/Mask_Tif \ + /app/mx15hdi/Georeference/Tif \ + /app/mx15hdi/Metadata/CSV \ + /app/mx15hdi/Metadata/Image/Original_Images \ + /app/mx15hdi/Polygon/Shp + +# ------------------------------------------------------------------------------ +# 런타임 설정 +# ------------------------------------------------------------------------------ +EXPOSE 5001 + +# workers=1: 모델을 프로세스 하나에서만 로드 (메모리 공유 불가) +CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "5001", "--workers", "1"] diff --git a/prediction/image/api.py b/prediction/image/api.py new file mode 100644 index 0000000..1a927e4 --- /dev/null +++ b/prediction/image/api.py @@ -0,0 +1,340 @@ +import sys +import os +from pathlib import Path +from contextlib import asynccontextmanager +import asyncio +from concurrent.futures import ThreadPoolExecutor + +from fastapi import FastAPI, HTTPException, File, UploadFile, Form +from fastapi.responses import Response, FileResponse +import subprocess +import rasterio +import numpy as np +from PIL import Image +from PIL.ExifTags import TAGS +import io +import base64 +from pyproj import Transformer +from extract_data import get_metadata as get_meta +from extract_data import get_oil_type as get_oil +import time + +from typing import List, Optional +import shutil +from datetime import datetime +from collections import Counter + +# mx15hdi 파이프라인 모듈 임포트를 위한 sys.path 설정 +_BASE_DIR = Path(__file__).parent +sys.path.insert(0, str(_BASE_DIR / 'mx15hdi' / 'Detect')) +sys.path.insert(0, str(_BASE_DIR / 'mx15hdi' / 'Metadata' / 'Scripts')) +sys.path.insert(0, str(_BASE_DIR / 'mx15hdi' / 'Georeference' / 'Scripts')) +sys.path.insert(0, str(_BASE_DIR / 'mx15hdi' / 'Polygon' / 'Scripts')) + +from Inference import load_model, run_inference +from Export_Metadata_mx15hdi import run_metadata_export +from Create_Georeferenced_Images_nadir import run_georeference +from Oilshape import run_oilshape + +# AI 모델 (서버 시작 시 1회 로드) +_model = None +# CPU/GPU 바운드 작업용 스레드 풀 +_executor = ThreadPoolExecutor(max_workers=4) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """서버 시작 시 AI 모델을 1회 로드하고, 종료 시 해제한다.""" + global _model + print("AI 모델 로딩 중 (epoch_165.pth)...") + _model = load_model() + print("AI 모델 로드 완료") + yield + _model = None + + +app = FastAPI(lifespan=lifespan) + + +def check_gps_info(image_path: str): + # Pillow로 이미지 열기 + image = Image.open(image_path) + + # EXIF 데이터 추출 + exifdata = image.getexif() + + if not exifdata: + print("EXIF 정보를 찾을 수 없습니다.") + return False + + # GPS 정보 추출 + gps_ifd = exifdata.get_ifd(0x8825) # GPS IFD 태그 + if not gps_ifd: + print("GPS 정보를 찾을 수 없습니다.") + return False + + return True + + +def check_camera_info(image_file): + # Pillow로 이미지 열기 + image = Image.open(image_file) + + # EXIF 데이터 추출 + exifdata = image.getexif() + + if not exifdata: + print("EXIF 정보를 찾을 수 없습니다.") + return False + + for tag_id, value in exifdata.items(): + tag_name = TAGS.get(tag_id, tag_id) + if tag_name == "Model": + return value.strip() if isinstance(value, str) else value + + +async def _run_mx15hdi_pipeline(file_id: str): + """ + mx15hdi 파이프라인을 in-process로 실행한다. + - Step 1 (AI 추론) + Step 2 (메타데이터 추출) 병렬 실행 + - Step 3 (지리참조) → Step 4 (폴리곤 추출) 순차 실행 + - 중간 파일 I/O 없이 numpy 배열을 메모리로 전달 + """ + loop = asyncio.get_event_loop() + + # Step 1 + Step 2 병렬 실행 — inference_cache 캡처 + inference_cache, _ = await asyncio.gather( + loop.run_in_executor(_executor, run_inference, _model, file_id), + loop.run_in_executor(_executor, run_metadata_export, file_id), + ) + + # Step 3: Georeference — inference_cache 메모리로 전달, georef_cache 반환 + georef_cache = await loop.run_in_executor( + _executor, run_georeference, file_id, inference_cache + ) + + # Step 4: Polygon 추출 — georef_cache 메모리로 전달 (Mask_Tif 디스크 읽기 없음) + await loop.run_in_executor(_executor, run_oilshape, file_id, georef_cache) + + +# 전체 과정을 구동하는 api +@app.post("/run-script/") +async def run_script( + # pollId: int = Form(...), + camTy: str = Form(...), + fileId: str = Form(...), + image: UploadFile = File(...) +): + try: + print("start") + start_time = time.perf_counter() + + if camTy not in ["mx15hdi", "starsafire"]: + raise HTTPException(status_code=400, detail="string1 must be 'mx15hdi' or 'starsafire'") + + # 저장할 이미지 경로 설정 + upload_dir = os.path.join(camTy, "Metadata/Image/Original_Images", fileId) + os.makedirs(upload_dir, exist_ok=True) + + # 이미지 파일 저장 + image_path = os.path.join(upload_dir, image.filename) + with open(image_path, "wb") as f: + f.write(await image.read()) + + gps_flage = check_gps_info(image_path) + if not gps_flage: + return {"detail": "GPS Infomation Not Found"} + + if camTy == "mx15hdi": + # in-process 파이프라인 실행 (모델 재로딩 없음, Step1+2 병렬) + await _run_mx15hdi_pipeline(fileId) + else: + # starsafire: 기존 subprocess 방식 유지 + script_dir = os.path.join(os.getcwd(), camTy, "Main") + script_file = "Combine_module.py" + script_path = os.path.join(script_dir, script_file) + + if not os.path.exists(script_path): + raise HTTPException(status_code=404, detail="Script not found") + + result = subprocess.run( + ["python", script_file, fileId], + cwd=script_dir, + capture_output=True, + text=True, + timeout=300 + ) + print(f"Subprocess stdout: {result.stdout}") + print(f"Subprocess stderr: {result.stderr}") + + meta_string = get_meta(camTy, fileId) + oil_data = get_oil(camTy, fileId) + end_time = time.perf_counter() + print(f"Run time: {end_time - start_time:.4f} sec") + + return { + "meta": meta_string, + "data": oil_data + } + + except subprocess.TimeoutExpired: + raise HTTPException(status_code=500, detail="Script execution timed out") + except Exception as e: + import traceback + traceback.print_exc() + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/get-metadata/{camTy}/{fileId}") +async def get_metadata(camTy: str, fileId: str): + try: + meta_string = get_meta(camTy, fileId) + oil_data = get_oil(camTy, fileId) + + return { + "meta": meta_string, + "data": oil_data + } + + except subprocess.TimeoutExpired: + raise HTTPException(status_code=500, detail="Script execution timed out") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/get-original-image/{camTy}/{fileId}") +async def get_original_image(camTy: str, fileId: str): + try: + image_path = os.path.join(camTy, "Metadata/Image/Original_Images", fileId) + files = os.listdir(image_path) + target_file = [f for f in files if f.endswith(".png") or f.endswith(".jpg")] + image_file = os.path.join(image_path, target_file[0]) + + with open(image_file, "rb") as origin_image: + base64_string = base64.b64encode(origin_image.read()).decode("utf-8") + print(base64_string[:100]) + + return base64_string + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/get-image/{camTy}/{fileId}") +async def get_image(camTy: str, fileId: str): + try: + tif_file_path = os.path.join(camTy, "Georeference/Tif", fileId) + files = os.listdir(tif_file_path) + target_file = [f for f in files if f.endswith(".tif")] + tif_file = os.path.join(tif_file_path, target_file[0]) + + with rasterio.open(tif_file) as dataset: + crs = dataset.crs + + bounds = dataset.bounds + + if crs != "EPSG:4326": + transformer = Transformer.from_crs(crs, "EPSG:4326", always_xy=True) + minx, miny = transformer.transform(bounds.left, bounds.bottom) + maxx, maxy = transformer.transform(bounds.right, bounds.top) + + print(minx, miny, maxx, maxy) + + data = dataset.read() + if data.shape[0] == 1: + image_data = data[0] + else: + image_data = np.moveaxis(data, 0, -1) + + image = Image.fromarray(image_data) + buffer = io.BytesIO() + image.save(buffer, format="PNG") + + base64_string = base64.b64encode(buffer.getvalue()).decode("utf-8") + + print(base64_string[:100]) + return { + "minLon": minx, + "minLat": miny, + "maxLon": maxx, + "maxLat": maxy, + "image": base64_string + } + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +BASE_DIR = Path(__file__).parent +PIC_GPS_SCRIPT = BASE_DIR / "pic_gps.py" + +@app.post("/stitch") +async def stitch( + files: List[UploadFile] = File(..., description="합성할 이미지 파일들 (2장 이상)"), + fileId: str = Form(...) +): + if len(files) < 2: + raise HTTPException( + status_code=400, + detail="최소 2장 이상의 이미지가 필요합니다." + ) + + try: + today = datetime.now().strftime("%Y%m%d") + upload_dir = BASE_DIR / "stitch" / fileId + upload_dir.mkdir(parents=True, exist_ok=True) + + model_list = [] + for idx, file in enumerate(files): + + model = check_camera_info(file.file) + model_list.append(model) + + original_filename = file.filename or f"image_{idx}.jpg" + filename = f"{model}_{idx:03d}_{original_filename}" + file_path = upload_dir / filename + + output_filename = f"stitched_{fileId}.jpg" + output_path = upload_dir / output_filename + + # 파일 저장 + with open(file_path, "wb") as buffer: + shutil.copyfileobj(file.file, buffer) + + model_counter = Counter(model_list) + most_common_model = model_counter.most_common(1) + + cmd = [ + "python", + str(PIC_GPS_SCRIPT), + "--mode", "drone", + "--input", str(upload_dir), + "--out", str(output_path), + "--model", most_common_model[0][0], + "--enhance" + ] + + print(cmd) + + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=300 + ) + + print(f"Subprocess stdout: {result.stdout}") + if result.returncode != 0: + print(f"Subprocess stderr: {result.stderr}") + raise HTTPException(status_code=500, detail=f"Script failed: {result.stderr}") + + return FileResponse( + path=str(output_path), + media_type="image/jpeg", + filename=output_filename + ) + except HTTPException: + raise + + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=5001) diff --git a/prediction/image/docker-compose.cpu.yml b/prediction/image/docker-compose.cpu.yml new file mode 100644 index 0000000..4b1e555 --- /dev/null +++ b/prediction/image/docker-compose.cpu.yml @@ -0,0 +1,46 @@ +version: "3.9" + +# CPU 전용 docker-compose 설정 +# GPU(nvidia-container-toolkit) 없이도 실행 가능 +# 실행: docker compose -f docker-compose.cpu.yml up -d --build + +services: + image-analysis: + build: + context: . + dockerfile: Dockerfile.cpu + image: wing-image-analysis:cpu + container_name: wing-image-analysis + ports: + - "5001:5001" + environment: + - DEVICE=cpu + + volumes: + # ── mx15hdi (EO 드론 카메라) ──────────────────────────────────────── + # 입력: 업로드된 원본 이미지 + - ./mx15hdi/Metadata/Image/Original_Images:/app/mx15hdi/Metadata/Image/Original_Images + # 출력: 메타데이터 CSV + - ./mx15hdi/Metadata/CSV:/app/mx15hdi/Metadata/CSV + # 출력: 지리참조 GeoTIFF (컬러 / 마스크) + - ./mx15hdi/Georeference/Tif:/app/mx15hdi/Georeference/Tif + - ./mx15hdi/Georeference/Mask_Tif:/app/mx15hdi/Georeference/Mask_Tif + # 출력: 유류 폴리곤 Shapefile + - ./mx15hdi/Polygon/Shp:/app/mx15hdi/Polygon/Shp + # 출력: 블렌딩 추론 결과 / 마스크 이미지 + - ./mx15hdi/Detect/result:/app/mx15hdi/Detect/result + - ./mx15hdi/Detect/Mask_result:/app/mx15hdi/Detect/Mask_result + # ── starsafire (열화상 카메라) ────────────────────────────────────── + - ./starsafire/Metadata/Image/Original_Images:/app/starsafire/Metadata/Image/Original_Images + - ./starsafire/Metadata/CSV:/app/starsafire/Metadata/CSV + - ./starsafire/Georeference/Tif:/app/starsafire/Georeference/Tif + - ./starsafire/Georeference/Mask_Tif:/app/starsafire/Georeference/Mask_Tif + - ./starsafire/Polygon/Shp:/app/starsafire/Polygon/Shp + - ./starsafire/Detect/result:/app/starsafire/Detect/result + - ./starsafire/Detect/Mask_result:/app/starsafire/Detect/Mask_result + # ── 스티칭 결과 ───────────────────────────────────────────────────── + - ./stitch:/app/stitch + + # GPU deploy 섹션 없음 — CPU 전용 실행 + + restart: unless-stopped diff --git a/prediction/image/docker-compose.yml b/prediction/image/docker-compose.yml new file mode 100644 index 0000000..125eacf --- /dev/null +++ b/prediction/image/docker-compose.yml @@ -0,0 +1,47 @@ +version: "3.9" + +services: + image-analysis: + build: + context: . + dockerfile: Dockerfile + image: wing-image-analysis:latest + container_name: wing-image-analysis + ports: + - "5001:5001" + + volumes: + # ── mx15hdi (EO 드론 카메라) ──────────────────────────────────────── + # 입력: 업로드된 원본 이미지 + - ./mx15hdi/Metadata/Image/Original_Images:/app/mx15hdi/Metadata/Image/Original_Images + # 출력: 메타데이터 CSV + - ./mx15hdi/Metadata/CSV:/app/mx15hdi/Metadata/CSV + # 출력: 지리참조 GeoTIFF (컬러 / 마스크) + - ./mx15hdi/Georeference/Tif:/app/mx15hdi/Georeference/Tif + - ./mx15hdi/Georeference/Mask_Tif:/app/mx15hdi/Georeference/Mask_Tif + # 출력: 유류 폴리곤 Shapefile + - ./mx15hdi/Polygon/Shp:/app/mx15hdi/Polygon/Shp + # 출력: 블렌딩 추론 결과 / 마스크 이미지 + - ./mx15hdi/Detect/result:/app/mx15hdi/Detect/result + - ./mx15hdi/Detect/Mask_result:/app/mx15hdi/Detect/Mask_result + # ── starsafire (열화상 카메라) ────────────────────────────────────── + - ./starsafire/Metadata/Image/Original_Images:/app/starsafire/Metadata/Image/Original_Images + - ./starsafire/Metadata/CSV:/app/starsafire/Metadata/CSV + - ./starsafire/Georeference/Tif:/app/starsafire/Georeference/Tif + - ./starsafire/Georeference/Mask_Tif:/app/starsafire/Georeference/Mask_Tif + - ./starsafire/Polygon/Shp:/app/starsafire/Polygon/Shp + - ./starsafire/Detect/result:/app/starsafire/Detect/result + - ./starsafire/Detect/Mask_result:/app/starsafire/Detect/Mask_result + # ── 스티칭 결과 ───────────────────────────────────────────────────── + - ./stitch:/app/stitch + + # NVIDIA GPU 할당 (nvidia-container-toolkit 필수) + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + + restart: unless-stopped diff --git a/prediction/image/extract_data.py b/prediction/image/extract_data.py new file mode 100644 index 0000000..cc492fe --- /dev/null +++ b/prediction/image/extract_data.py @@ -0,0 +1,97 @@ +import csv +from datetime import datetime +from pathlib import Path +import geopandas as gpd +import json + +def get_metadata(camTy: str, fileId: str): + + # CSV 파일 경로 설정 + # base_dir = "mx15hdi" if pollId == "1" else "starsafire" + if camTy == "mx15hdi": + csv_path = f"{camTy}/Metadata/CSV/{fileId}/mx15hdi_interpolation.csv" + elif camTy == "starsafire": + csv_path = f"{camTy}/Metadata/CSV/{fileId}/Metadata_Extracted.csv" + + try: + # CSV 파일 읽기 + with open(csv_path, 'r', newline='', encoding='utf-8-sig') as csvfile: + reader = csv.reader(csvfile) + next(reader, None) + row = next(reader, None) + return ','.join(row) + + except FileNotFoundError: + print(f"CSV file not found: {csv_path}") + raise + except ValueError as e: + print(f"Value error: {str(e)}") + raise + except Exception as e: + print(f"Error processing CSV: {e}") + raise + + +def get_oil_type(camTy: str, fileId: str): + # Shapefile 경로 설정 + path = f"{camTy}/Polygon/Shp/{fileId}" + shp_file = list(Path(path).glob("*.shp")) + if not shp_file: + return [] + shp_path = f"{camTy}/Polygon/Shp/{fileId}/{shp_file[0].name}" + print(shp_path) + # if camTy == "mx15hdi": + # fileSub = f"{Path(fileName).stem}_gsd" + # elif camTy == "starsafire": + # fileSub = f"{Path(fileName).stem}" + + # shp_path = f"{camTy}/Polygon/Shp/{fileId}/{fileSub}.shp" + + # 두께 정보 + class_thickness_mm = { + 1: 1.0, # Black oil (Emulsion) + 2: 0.1, # Brown oil (Crude) + 3: 0.0003, # Rainbow oil (Slick) + 4: 0.0001 # Silver oil (Slick) + } + # 알고리즘 정보 + algorithm = { + 1: "검정", + 2: "갈색", + 3: "무지개", + 4: "은색" + } + + try: + # Shapefile 읽기 + gdf = gpd.read_file(shp_path) + if gdf.crs != "epsg:4326": + gdf = gdf.to_crs("epsg:4326") + + # 데이터 준비 + data = [] + for _, row in gdf.iterrows(): + class_id = row.get('class_id', None) + area_m2 = row.get('area_m2', None) + volume_m3 = row.get('volume_m3', None) + note = row.get('note', None) + thickness_m = class_thickness_mm.get(class_id, 0) / 1000.0 + geom_wkt = row.geometry.wkt if row.geometry else None + result = { + "classId": algorithm.get(class_id, 0), + "area": area_m2, + "volume": volume_m3, + "note": note, + "thickness": thickness_m, + "wkt": geom_wkt + } + data.append(result) + + return data + + except FileNotFoundError: + print(f"Shapefile not found: {shp_path}") + raise + except Exception as e: + print(f"Error processing shapefile or database: {str(e)}") + raise \ No newline at end of file diff --git a/prediction/image/image_plan.md b/prediction/image/image_plan.md new file mode 100644 index 0000000..0a14368 --- /dev/null +++ b/prediction/image/image_plan.md @@ -0,0 +1,238 @@ +# 이미지 업로드 유류 분석 기능 구현 계획 + +## Context + +드론/항공 촬영 이미지를 업로드하면 AI 세그멘테이션으로 유류 확산 정보(위치·유종·면적·부피)를 자동 추출하고, 결과를 DB에 저장한 뒤 예측정보 입력 폼에 자동 채워주는 기능이다. +이미지 분석 서버(`prediction/image/api.py`, FastAPI, 포트 5001)는 이미 구현되어 있으며, 프론트↔백엔드↔이미지 분석 서버 연동 및 결과 자동 채우기를 구현한다. + +--- + +## 전체 흐름 + +``` +[프론트] 이미지 선택 → 분석 요청 버튼 + ↓ POST /api/prediction/image-analyze (multipart: image) +[백엔드] + ├─ fileId = UUID 생성 + ├─ camTy = "mx15hdi" (하드코딩, 추후 이미지 EXIF 카메라 정보로 자동 판별 예정) + ├─ 이미지 분석 서버로 전달 POST http://IMAGE_API_URL/run-script/ + ├─ 응답 파싱: meta(위경도 DMS→십진수 변환), data[0].classId→유종 + ├─ ACDNT INSERT (lat/lon/임시사고명) + ├─ SPIL_DATA INSERT (유종/면적/img_rslt_data JSONB) + └─ 응답: { acdntSn, lat, lon, oilType, area, volume } + ↓ +[프론트] 폼 자동 채우기 (좌표·유종·유출량) + → 사용자가 나머지 입력 후 "확산예측 실행" +``` + +--- + +## 구현 단계 + +### Step 1 — DB 마이그레이션 (`database/migration/017_spil_img_rslt.sql`) + +`SPIL_DATA` 테이블에 이미지 분석 결과 컬럼 추가. + +```sql +ALTER TABLE wing.spil_data + ADD COLUMN IF NOT EXISTS img_rslt_data JSONB; +``` + +--- + +### Step 2 — 백엔드: 이미지 분석 엔드포인트 + +**파일**: `backend/src/prediction/predictionRouter.ts` (라우트 등록) +**신규 파일**: `backend/src/prediction/imageAnalyzeService.ts` + +#### 엔드포인트 + +``` +POST /api/prediction/image-analyze +Content-Type: multipart/form-data +Body: image (file) +``` + +#### `imageAnalyzeService.ts` 핵심 로직 + +```typescript +// 1. fileId 생성 (crypto.randomUUID) + +// 2. 이미지 분석 서버 호출 +// camTy는 현재 "mx15hdi"로 하드코딩한다. +// TODO: 추후 이미지 EXIF에서 카메라 모델명을 읽어 camTy를 자동 판별하는 로직을 +// 이미지 분석 서버(api.py)에 추가할 예정이다. (check_camera_info 함수 활용) +// FormData: { camTy: 'mx15hdi', fileId, image } +// → POST ${IMAGE_API_URL}/run-script/ +// 응답: { meta: string, data: OilPolygon[] } + +// 3. meta 문자열 파싱 (mx15hdi CSV 컬럼 순서 사용) +// [Filename, Tlat_d, Tlat_m, Tlat_s, Tlon_d, Tlon_m, Tlon_s, ...] +// DMS → 십진수: d + m/60 + s/3600 + +// 4. 유종 매핑 (data[0].classId → UI 유종명) +// classId → oilType: { '검정': '벙커C유', '갈색': '벙커C유', '무지개': '경유', '은색': '등유' } + +// 5. ACDNT INSERT (임시 사고명 = "이미지분석_YYYY-MM-DD HH:mm", lat, lon, occurredAt = 촬영시각) +// 6. SPIL_DATA INSERT (acdntSn, matTyCd, matVol=data[0].volume, imgRsltData=JSON.stringify(response)) + +// 7. 반환 +interface ImageAnalyzeResult { + acdntSn: number; + lat: number; + lon: number; + oilType: string; // UI 유종명 (벙커C유 등) + area: number; // m² + volume: number; // m³ + fileId: string; +} +``` + +#### 환경변수 추가 (`backend/.env`) + +``` +IMAGE_API_URL=http://localhost:5001 +``` + +#### 에러 처리 + +| 조건 | 응답 | +|------|------| +| 이미지에 GPS EXIF 없음 | 422 `{ error: 'GPS_NOT_FOUND' }` | +| 이미지 서버 타임아웃(300s) | 504 | + +--- + +### Step 3 — 프론트엔드: API 서비스 + +**파일**: `frontend/src/tabs/prediction/services/predictionApi.ts` + +```typescript +interface ImageAnalyzeResult { + acdntSn: number; + lat: number; + lon: number; + oilType: string; + area: number; + volume: number; + fileId: string; +} + +export const analyzeImage = async ( + file: File +): Promise => { + const formData = new FormData(); + formData.append('image', file); + const { data } = await api.post( + '/prediction/image-analyze', + formData, + { headers: { 'Content-Type': 'multipart/form-data' }, timeout: 330000 } + ); + return data; +}; +``` + +--- + +### Step 4 — 프론트엔드: Props 타입 확장 + +**파일**: `frontend/src/tabs/prediction/components/leftPanelTypes.ts` + +```typescript +// 기존 Props에 추가 +onImageAnalysisResult?: (result: ImageAnalyzeResult) => void; +``` + +--- + +### Step 5 — 프론트엔드: PredictionInputSection 수정 + +**파일**: `frontend/src/tabs/prediction/components/PredictionInputSection.tsx` + +#### 변경 사항 + +1. **"이미지 분석 실행" 버튼** (이미지 선택 후 활성화) + - 클릭 시 `analyzeImage(file)` 호출 (camTy는 백엔드에서 처리) + - 로딩 스피너 표시 (분석 소요시간 수십 초~수 분) + +2. **분석 결과 표시** (성공 시) + - "분석 완료: 위도 XX.XXXX / 경도 XXX.XXXX / 유종: OO" 요약 메시지 + +3. **`onImageAnalysisResult` 콜백 호출** + - 분석 성공 시 부모로 결과 전달 + +4. **에러 처리** + - GPS_NOT_FOUND: "GPS 정보가 없는 이미지입니다" 메시지 표시 + - 타임아웃: "분석 서버 응답 없음" 메시지 표시 + +5. **로컬 상태 교체**: `uploadedImage` (Base64 DataURL) 제거, `uploadedFile: File | null`로 교체 + +--- + +### Step 6 — 프론트엔드: OilSpillView 결과 처리 + +**파일**: `frontend/src/tabs/prediction/components/OilSpillView.tsx` + +```typescript +const handleImageAnalysisResult = useCallback((result: ImageAnalyzeResult) => { + // 1. 사고 좌표 자동 채우기 + setIncidentCoord({ lat: result.lat, lon: result.lon }) + setFlyToCoord({ lat: result.lat, lon: result.lon }) + + // 2. 유종/유출량 자동 채우기 + setOilType(result.oilType) + setSpillAmount(parseFloat(result.volume.toFixed(4))) + setSpillUnit('m³') + + // 3. 분석 선택 상태 갱신 (acdntSn 연결 — 시뮬레이션 실행 시 기존 사고 사용) + setSelectedAnalysis({ + acdntSn: result.acdntSn, + acdntNm: '', + // ... 나머지 기본값 + }) +}, []) +``` + +`LeftPanel`에 `onImageAnalysisResult={handleImageAnalysisResult}` 전달. + +--- + +## 수정 대상 파일 요약 + +| 파일 | 변경 유형 | +|------|---------| +| `database/migration/017_spil_img_rslt.sql` | **신규** — SPIL_DATA 컬럼 추가 | +| `backend/src/prediction/imageAnalyzeService.ts` | **신규** — 이미지 분석 서비스 | +| `backend/src/prediction/predictionRouter.ts` | **수정** — 라우트 추가 | +| `backend/.env` | **수정** — IMAGE_API_URL 추가 | +| `frontend/src/tabs/prediction/services/predictionApi.ts` | **수정** — analyzeImage 함수 추가 | +| `frontend/src/tabs/prediction/components/leftPanelTypes.ts` | **수정** — Props 타입 추가 | +| `frontend/src/tabs/prediction/components/PredictionInputSection.tsx` | **수정** — 분석 실행 UI | +| `frontend/src/tabs/prediction/components/OilSpillView.tsx` | **수정** — 결과 처리 핸들러 | + +--- + +## 검증 방법 + +1. **이미지 분석 서버 직접 테스트** + ```bash + curl -X POST http://localhost:5001/run-script/ \ + -F "camTy=mx15hdi" -F "fileId=test001" -F "image=@drone_image.jpg" + ``` + +2. **백엔드 엔드포인트 테스트** + ```bash + curl -X POST http://localhost:3001/api/prediction/image-analyze \ + -F "image=@drone_image.jpg" \ + -H "Cookie: " + ``` + - 응답: `{ acdntSn, lat, lon, oilType, area, volume, fileId }` + - DB 확인: ACDNT, SPIL_DATA 레코드 생성 여부 + +3. **프론트엔드 E2E 테스트** + - 이미지 업로드 모드 선택 → GPS EXIF 있는 이미지 업로드 → "이미지 분석 실행" 클릭 + - 로딩 표시 → 완료 시: 지도 이동, 유종/좌표 폼 자동 채워짐 확인 + - 나머지 필드(예상시각·유출시간 등) 직접 입력 후 "확산예측 실행" → 정상 시뮬레이션 확인 + +4. **에러 케이스 확인** + - GPS 없는 이미지 → "GPS 정보가 없는 이미지입니다" 메시지 diff --git a/prediction/image/mx15hdi/Detect/Inference.py b/prediction/image/mx15hdi/Detect/Inference.py new file mode 100644 index 0000000..52aa18f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/Inference.py @@ -0,0 +1,131 @@ +import os, mmcv, cv2, json +import numpy as np +import torch +from pathlib import Path +from PIL import Image +from tqdm import tqdm +from mmseg.apis import init_segmentor, inference_segmentor +from shapely.geometry import Polygon, mapping +import sys + +_DETECT_DIR = Path(__file__).parent # mx15hdi/Detect/ +_MX15HDI_DIR = _DETECT_DIR.parent # mx15hdi/ + + +def load_model(): + """서버 시작 시 1회 호출. 로드된 모델 객체를 반환한다.""" + # 우선순위: 환경변수 DEVICE > GPU 자동감지 > CPU 폴백 + env_device = os.environ.get('DEVICE', '').strip() + if env_device: + device = env_device + elif torch.cuda.is_available(): + device = 'cuda:0' + else: + device = 'cpu' + print(f'[Inference] 사용 device: {device}') + + config = str(_DETECT_DIR / 'V7_SPECIAL.py') + checkpoint = str(_DETECT_DIR / 'epoch_165.pth') + model = init_segmentor(config, checkpoint, device=device) + model.PALETTE = [ + [0, 0, 0], # background + [0, 0, 204], # black + [180, 180, 180], # brown + [255, 255, 0], # rainbow + [178, 102, 255] # silver + ] + return model + + +def blend_images(original_img, color_mask, alpha=0.6): + """ + Blend original image and color mask with alpha transparency. + Inputs: numpy arrays HWC uint8 + """ + blended = cv2.addWeighted(original_img, 1 - alpha, color_mask, alpha, 0) + return blended + + +def run_inference(model, file_id: str, write_files: bool = False) -> dict: + """ + 사전 로드된 모델로 file_id 폴더 내 이미지를 세그멘테이션한다. + + Args: + model: load_model()로 로드된 모델 객체. + file_id: 처리할 이미지 폴더명. + write_files: True이면 Detect/result/ 와 Detect/Mask_result/ 에 중간 파일 저장. + False이면 디스크 쓰기 생략 (기본값). + + Returns: + inference_cache: {image_filename: {'blended': ndarray, 'mask': ndarray, 'ext': str}} + 이 값을 run_georeference()에 전달하면 중간 파일 읽기를 생략할 수 있다. + """ + img_path = str(_MX15HDI_DIR / 'Metadata' / 'Image' / 'Original_Images' / file_id) + output_folder = str(_DETECT_DIR / 'result' / file_id) + mask_folder = str(_DETECT_DIR / 'Mask_result' / file_id) + + if not os.path.exists(img_path): + raise FileNotFoundError(f"이미지 폴더가 존재하지 않습니다: {img_path}") + + if write_files: + os.makedirs(output_folder, exist_ok=True) + os.makedirs(mask_folder, exist_ok=True) + + image_files = [ + f for f in os.listdir(img_path) + if f.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tif', '.tiff')) + ] + + # palette_array는 이미지마다 동일하므로 루프 외부에서 1회 생성 + palette_array = np.array(model.PALETTE, dtype=np.uint8) + + inference_cache = {} + + for image_file in tqdm(image_files, desc="Processing images"): + image_path = os.path.join(img_path, image_file) + image_name, image_ext = os.path.splitext(image_file) + image_ext = image_ext.lower() + + # 이미지를 1회만 읽어 inference와 blending 모두에 재사용 + img_bgr = cv2.imread(image_path) + img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) + + # 이미 로드된 배열을 inference_segmentor에 전달 (경로 전달 시 내부에서 재읽기 발생) + result = inference_segmentor(model, img_bgr) + seg_map = result[0] + + # Create color mask from palette + color_mask = palette_array[seg_map] + + # blended image + blended = blend_images(img_rgb, color_mask, alpha=0.6) + blended_bgr = cv2.cvtColor(blended, cv2.COLOR_RGB2BGR) + + # mask — numpy 슬라이싱으로 cv2.cvtColor 호출 1회 제거 + mask_bgr = color_mask[:, :, ::-1].copy() + + # 결과를 메모리 캐시에 저장 (georeference 단계에서 재사용) + # mask는 palette 원본(RGB) 그대로 저장 — Oilshape의 class_colors가 RGB 기준이므로 BGR로 저장 시 색상 매칭 실패 + inference_cache[image_file] = { + 'blended': blended_bgr, + 'mask': color_mask, + 'ext': image_ext, + } + + if write_files: + cv2.imwrite( + os.path.join(output_folder, f"{image_name}{image_ext}"), + blended_bgr, + [cv2.IMWRITE_JPEG_QUALITY, 85] + ) + cv2.imwrite(os.path.join(mask_folder, f"{image_name}{image_ext}"), mask_bgr) + + return inference_cache + + +if __name__ == '__main__': + if len(sys.argv) < 2: + raise ValueError("파라미터가 제공되지 않았습니다. 폴더 이름을 명령줄 인자로 입력해주세요.") + _model = load_model() + # CLI 단독 실행 시에는 중간 파일도 디스크에 저장 + run_inference(_model, sys.argv[1], write_files=True) diff --git a/prediction/image/mx15hdi/Detect/V7_SPECIAL.py b/prediction/image/mx15hdi/Detect/V7_SPECIAL.py new file mode 100644 index 0000000..7a3ff9e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/V7_SPECIAL.py @@ -0,0 +1,196 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[119.54541993, 107.13545011, 96.71320316], + std=[60.3273945, 56.33692515, 55.71005772], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[119.54541993, 107.13545011, 96.71320316], + std=[60.3273945, 56.33692515, 55.71005772], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[119.54541993, 107.13545011, 96.71320316], + std=[60.3273945, 56.33692515, 55.71005772], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[119.54541993, 107.13545011, 96.71320316], + std=[60.3273945, 56.33692515, 55.71005772], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[119.54541993, 107.13545011, 96.71320316], + std=[60.3273945, 56.33692515, 55.71005772], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[119.54541993, 107.13545011, 96.71320316], + std=[60.3273945, 56.33692515, 55.71005772], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png')) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +#workflow = [('train', 1), ('val', 1)] +workflow = [('test', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict(project='Oil_Spill', name='V7_SPECIAL')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_SPECIAL' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.circleci/config.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/.circleci/config.yml new file mode 100644 index 0000000..9456918 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.circleci/config.yml @@ -0,0 +1,161 @@ +version: 2.1 + +jobs: + lint: + docker: + - image: cimg/python:3.7.4 + steps: + - checkout + - run: + name: Install dependencies + command: | + sudo apt-add-repository ppa:brightbox/ruby-ng -y + sudo apt-get update + sudo apt-get install -y ruby2.7 + - run: + name: Install pre-commit hook + command: | + pip install pre-commit + pre-commit install + - run: + name: Linting + command: pre-commit run --all-files + - run: + name: Check docstring coverage + command: | + pip install interrogate + interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 50 mmseg + + build_cpu: + parameters: + # The python version must match available image tags in + # https://circleci.com/developer/images/image/cimg/python + python: + type: string + default: "3.7.4" + torch: + type: string + torchvision: + type: string + docker: + - image: cimg/python:<< parameters.python >> + resource_class: large + steps: + - checkout + - run: + name: Install Libraries + command: | + sudo apt-get update + sudo apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx libjpeg-dev zlib1g-dev libtinfo-dev libncurses5 + - run: + name: Configure Python & pip + command: | + python -m pip install --upgrade pip + python -m pip install wheel + - run: + name: Install PyTorch + command: | + python -V + python -m pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html + - run: + name: Install mmseg dependencies + command: | + python -m pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch<< parameters.torch >>/index.html + python -m pip install mmdet + python -m pip install -r requirements.txt + - run: + name: Build and install + command: | + python -m pip install -e . + - run: + name: Run unittests + command: | + python -m pip install timm + python -m coverage run --branch --source mmseg -m pytest tests/ + python -m coverage xml + python -m coverage report -m + + build_cu101: + machine: + image: ubuntu-1604-cuda-10.1:201909-23 + resource_class: gpu.nvidia.small + steps: + - checkout + - run: + name: Install Libraries + command: | + sudo apt-get update + sudo apt-get install -y git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx + - run: + name: Configure Python & pip + command: | + pyenv global 3.7.0 + python -m pip install --upgrade pip + python -m pip install wheel + - run: + name: Install PyTorch + command: | + python -V + python -m pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html + - run: + name: Install mmseg dependencies + # python -m pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch${{matrix.torch_version}}/index.html + command: | + python -m pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html + python -m pip install mmdet + python -m pip install -r requirements.txt + - run: + name: Build and install + command: | + python setup.py check -m -s + TORCH_CUDA_ARCH_LIST=7.0 python -m pip install -e . + - run: + name: Run unittests + command: | + python -m pip install timm + python -m pytest tests/ + +workflows: + unit_tests: + jobs: + - lint + - build_cpu: + name: build_cpu_th1.6 + torch: 1.6.0 + torchvision: 0.7.0 + requires: + - lint + - build_cpu: + name: build_cpu_th1.7 + torch: 1.7.0 + torchvision: 0.8.1 + requires: + - lint + - build_cpu: + name: build_cpu_th1.8_py3.9 + torch: 1.8.0 + torchvision: 0.9.0 + python: "3.9.0" + requires: + - lint + - build_cpu: + name: build_cpu_th1.9_py3.8 + torch: 1.9.0 + torchvision: 0.10.0 + python: "3.8.0" + requires: + - lint + - build_cpu: + name: build_cpu_th1.9_py3.9 + torch: 1.9.0 + torchvision: 0.10.0 + python: "3.9.0" + requires: + - lint + - build_cu101: + requires: + - build_cpu_th1.6 + - build_cpu_th1.7 + - build_cpu_th1.8_py3.9 + - build_cpu_th1.9_py3.8 + - build_cpu_th1.9_py3.9 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/batch_test_list.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/batch_test_list.py new file mode 100644 index 0000000..c4fd8f9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/batch_test_list.py @@ -0,0 +1,133 @@ +# yapf: disable +# Inference Speed is tested on NVIDIA V100 +hrnet = [ + dict( + config='configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py', + checkpoint='fcn_hr18s_512x512_160k_ade20k_20200614_214413-870f65ac.pth', # noqa + eval='mIoU', + metric=dict(mIoU=33.0), + ), + dict( + config='configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py', + checkpoint='fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth', # noqa + eval='mIoU', + metric=dict(mIoU=76.31), + ), + dict( + config='configs/hrnet/fcn_hr48_512x512_160k_ade20k.py', + checkpoint='fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth', + eval='mIoU', + metric=dict(mIoU=42.02), + ), + dict( + config='configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py', + checkpoint='fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth', # noqa + eval='mIoU', + metric=dict(mIoU=80.65), + ), +] +pspnet = [ + dict( + config='configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py', + checkpoint='pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth', # noqa + eval='mIoU', + metric=dict(mIoU=78.55), + ), + dict( + config='configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py', + checkpoint='pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth', # noqa + eval='mIoU', + metric=dict(mIoU=79.76), + ), + dict( + config='configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py', + checkpoint='pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth', # noqa + eval='mIoU', + metric=dict(mIoU=44.39), + ), + dict( + config='configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py', + checkpoint='pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth', # noqa + eval='mIoU', + metric=dict(mIoU=42.48), + ), +] +resnest = [ + dict( + config='configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py', + checkpoint='pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth', # noqa + eval='mIoU', + metric=dict(mIoU=45.44), + ), + dict( + config='configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py', + checkpoint='pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth', # noqa + eval='mIoU', + metric=dict(mIoU=78.57), + ), +] +fastscnn = [ + dict( + config='configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py', + checkpoint='fast_scnn_8x4_160k_lr0.12_cityscapes-0cec9937.pth', + eval='mIoU', + metric=dict(mIoU=70.96), + ) +] +deeplabv3plus = [ + dict( + config='configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py', # noqa + checkpoint='deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405-a7573d20.pth', # noqa + eval='mIoU', + metric=dict(mIoU=80.98), + ), + dict( + config='configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py', # noqa + checkpoint='deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth', # noqa + eval='mIoU', + metric=dict(mIoU=80.97), + ), + dict( + config='configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py', # noqa + checkpoint='deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth', # noqa + eval='mIoU', + metric=dict(mIoU=80.09), + ), + dict( + config='configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py', # noqa + checkpoint='deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth', # noqa + eval='mIoU', + metric=dict(mIoU=79.83), + ), +] +vit = [ + dict( + config='configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py', + checkpoint='upernet_vit-b16_ln_mln_512x512_160k_ade20k-f444c077.pth', + eval='mIoU', + metric=dict(mIoU=47.73), + ), + dict( + config='configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py', + checkpoint='upernet_deit-s16_ln_mln_512x512_160k_ade20k-c0cd652f.pth', + eval='mIoU', + metric=dict(mIoU=43.52), + ), +] +fp16 = [ + dict( + config='configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py', # noqa + checkpoint='deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth', # noqa + eval='mIoU', + metric=dict(mIoU=80.46), + ) +] +swin = [ + dict( + config='configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py', # noqa + checkpoint='upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth', # noqa + eval='mIoU', + metric=dict(mIoU=44.41), + ) +] +# yapf: enable diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/batch_train_list.txt b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/batch_train_list.txt new file mode 100644 index 0000000..17d1993 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/batch_train_list.txt @@ -0,0 +1,19 @@ +configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py +configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py +configs/hrnet/fcn_hr48_512x512_160k_ade20k.py +configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py +configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py +configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py +configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py +configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py +configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py +configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py +configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py +configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py +configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py +configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py +configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py +configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py +configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py +configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py +configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/benchmark_evaluation.sh b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/benchmark_evaluation.sh new file mode 100644 index 0000000..68dc272 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/benchmark_evaluation.sh @@ -0,0 +1,41 @@ +PARTITION=$1 +CHECKPOINT_DIR=$2 + +echo 'configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION fcn_hr18s_512x512_160k_ade20k configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py $CHECKPOINT_DIR/fcn_hr18s_512x512_160k_ade20k_20200614_214413-870f65ac.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/fcn_hr18s_512x512_160k_ade20k --cfg-options dist_params.port=28171 & +echo 'configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION fcn_hr18s_512x1024_160k_cityscapes configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py $CHECKPOINT_DIR/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/fcn_hr18s_512x1024_160k_cityscapes --cfg-options dist_params.port=28172 & +echo 'configs/hrnet/fcn_hr48_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION fcn_hr48_512x512_160k_ade20k configs/hrnet/fcn_hr48_512x512_160k_ade20k.py $CHECKPOINT_DIR/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/fcn_hr48_512x512_160k_ade20k --cfg-options dist_params.port=28173 & +echo 'configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION fcn_hr48_512x1024_160k_cityscapes configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py $CHECKPOINT_DIR/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/fcn_hr48_512x1024_160k_cityscapes --cfg-options dist_params.port=28174 & +echo 'configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION pspnet_r50-d8_512x1024_80k_cityscapes configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py $CHECKPOINT_DIR/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/pspnet_r50-d8_512x1024_80k_cityscapes --cfg-options dist_params.port=28175 & +echo 'configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION pspnet_r101-d8_512x1024_80k_cityscapes configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py $CHECKPOINT_DIR/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/pspnet_r101-d8_512x1024_80k_cityscapes --cfg-options dist_params.port=28176 & +echo 'configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION pspnet_r101-d8_512x512_160k_ade20k configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py $CHECKPOINT_DIR/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/pspnet_r101-d8_512x512_160k_ade20k --cfg-options dist_params.port=28177 & +echo 'configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION pspnet_r50-d8_512x512_160k_ade20k configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py $CHECKPOINT_DIR/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/pspnet_r50-d8_512x512_160k_ade20k --cfg-options dist_params.port=28178 & +echo 'configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION pspnet_s101-d8_512x512_160k_ade20k configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py $CHECKPOINT_DIR/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/pspnet_s101-d8_512x512_160k_ade20k --cfg-options dist_params.port=28179 & +echo 'configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION pspnet_s101-d8_512x1024_80k_cityscapes configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py $CHECKPOINT_DIR/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/pspnet_s101-d8_512x1024_80k_cityscapes --cfg-options dist_params.port=28180 & +echo 'configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION fast_scnn_lr0.12_8x4_160k_cityscapes configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py $CHECKPOINT_DIR/fast_scnn_8x4_160k_lr0.12_cityscapes-0cec9937.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/fast_scnn_lr0.12_8x4_160k_cityscapes --cfg-options dist_params.port=28181 & +echo 'configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION deeplabv3plus_r101-d8_769x769_80k_cityscapes configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py $CHECKPOINT_DIR/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20200607_000405-a7573d20.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/deeplabv3plus_r101-d8_769x769_80k_cityscapes --cfg-options dist_params.port=28182 & +echo 'configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION deeplabv3plus_r101-d8_512x1024_80k_cityscapes configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py $CHECKPOINT_DIR/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/deeplabv3plus_r101-d8_512x1024_80k_cityscapes --cfg-options dist_params.port=28183 & +echo 'configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION deeplabv3plus_r50-d8_512x1024_80k_cityscapes configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py $CHECKPOINT_DIR/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/deeplabv3plus_r50-d8_512x1024_80k_cityscapes --cfg-options dist_params.port=28184 & +echo 'configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION deeplabv3plus_r50-d8_769x769_80k_cityscapes configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py $CHECKPOINT_DIR/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/deeplabv3plus_r50-d8_769x769_80k_cityscapes --cfg-options dist_params.port=28185 & +echo 'configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION upernet_vit-b16_ln_mln_512x512_160k_ade20k configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py $CHECKPOINT_DIR/upernet_vit-b16_ln_mln_512x512_160k_ade20k-f444c077.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/upernet_vit-b16_ln_mln_512x512_160k_ade20k --cfg-options dist_params.port=28186 & +echo 'configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION upernet_deit-s16_ln_mln_512x512_160k_ade20k configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py $CHECKPOINT_DIR/upernet_deit-s16_ln_mln_512x512_160k_ade20k-c0cd652f.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/upernet_deit-s16_ln_mln_512x512_160k_ade20k --cfg-options dist_params.port=28187 & +echo 'configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py $CHECKPOINT_DIR/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes-cc58bc8d.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes --cfg-options dist_params.port=28188 & +echo 'configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py $CHECKPOINT_DIR/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth --eval mIoU --work-dir work_dirs/benchmark_evaluation/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K --cfg-options dist_params.port=28189 & diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/benchmark_inference.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/benchmark_inference.py new file mode 100644 index 0000000..5124811 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/benchmark_inference.py @@ -0,0 +1,149 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import hashlib +import logging +import os +import os.path as osp +import warnings +from argparse import ArgumentParser + +import requests +from mmcv import Config + +from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot +from mmseg.utils import get_root_logger + +# ignore warnings when segmentors inference +warnings.filterwarnings('ignore') + + +def download_checkpoint(checkpoint_name, model_name, config_name, collect_dir): + """Download checkpoint and check if hash code is true.""" + url = f'https://download.openmmlab.com/mmsegmentation/v0.5/{model_name}/{config_name}/{checkpoint_name}' # noqa + + r = requests.get(url) + assert r.status_code != 403, f'{url} Access denied.' + + with open(osp.join(collect_dir, checkpoint_name), 'wb') as code: + code.write(r.content) + + true_hash_code = osp.splitext(checkpoint_name)[0].split('-')[1] + + # check hash code + with open(osp.join(collect_dir, checkpoint_name), 'rb') as fp: + sha256_cal = hashlib.sha256() + sha256_cal.update(fp.read()) + cur_hash_code = sha256_cal.hexdigest()[:8] + + assert true_hash_code == cur_hash_code, f'{url} download failed, ' + 'incomplete downloaded file or url invalid.' + + if cur_hash_code != true_hash_code: + os.remove(osp.join(collect_dir, checkpoint_name)) + + +def parse_args(): + parser = ArgumentParser() + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint_root', help='Checkpoint file root path') + parser.add_argument( + '-i', '--img', default='demo/demo.png', help='Image file') + parser.add_argument('-a', '--aug', action='store_true', help='aug test') + parser.add_argument('-m', '--model-name', help='model name to inference') + parser.add_argument( + '-s', '--show', action='store_true', help='show results') + parser.add_argument( + '-d', '--device', default='cuda:0', help='Device used for inference') + args = parser.parse_args() + return args + + +def inference_model(config_name, checkpoint, args, logger=None): + cfg = Config.fromfile(config_name) + if args.aug: + if 'flip' in cfg.data.test.pipeline[ + 1] and 'img_scale' in cfg.data.test.pipeline[1]: + cfg.data.test.pipeline[1].img_ratios = [ + 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 + ] + cfg.data.test.pipeline[1].flip = True + else: + if logger is not None: + logger.error(f'{config_name}: unable to start aug test') + else: + print(f'{config_name}: unable to start aug test', flush=True) + + model = init_segmentor(cfg, checkpoint, device=args.device) + # test a single image + result = inference_segmentor(model, args.img) + + # show the results + if args.show: + show_result_pyplot(model, args.img, result) + return result + + +# Sample test whether the inference code is correct +def main(args): + config = Config.fromfile(args.config) + + if not os.path.exists(args.checkpoint_root): + os.makedirs(args.checkpoint_root, 0o775) + + # test single model + if args.model_name: + if args.model_name in config: + model_infos = config[args.model_name] + if not isinstance(model_infos, list): + model_infos = [model_infos] + for model_info in model_infos: + config_name = model_info['config'].strip() + print(f'processing: {config_name}', flush=True) + checkpoint = osp.join(args.checkpoint_root, + model_info['checkpoint'].strip()) + try: + # build the model from a config file and a checkpoint file + inference_model(config_name, checkpoint, args) + except Exception: + print(f'{config_name} test failed!') + continue + return + else: + raise RuntimeError('model name input error.') + + # test all model + logger = get_root_logger( + log_file='benchmark_inference_image.log', log_level=logging.ERROR) + + for model_name in config: + model_infos = config[model_name] + + if not isinstance(model_infos, list): + model_infos = [model_infos] + for model_info in model_infos: + print('processing: ', model_info['config'], flush=True) + config_path = model_info['config'].strip() + config_name = osp.splitext(osp.basename(config_path))[0] + checkpoint_name = model_info['checkpoint'].strip() + checkpoint = osp.join(args.checkpoint_root, checkpoint_name) + + # ensure checkpoint exists + try: + if not osp.exists(checkpoint): + download_checkpoint(checkpoint_name, model_name, + config_name.rstrip('.py'), + args.checkpoint_root) + except Exception: + logger.error(f'{checkpoint_name} download error') + continue + + # test model inference with checkpoint + try: + # build the model from a config file and a checkpoint file + inference_model(config_path, checkpoint, args, logger) + except Exception as e: + logger.error(f'{config_path} " : {repr(e)}') + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/benchmark_train.sh b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/benchmark_train.sh new file mode 100644 index 0000000..cde47a0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/benchmark_train.sh @@ -0,0 +1,40 @@ +PARTITION=$1 + +echo 'configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION fcn_hr18s_512x512_160k_ade20k configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24727 --work-dir work_dirs/hrnet/fcn_hr18s_512x512_160k_ade20k >/dev/null & +echo 'configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION fcn_hr18s_512x1024_160k_cityscapes configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24728 --work-dir work_dirs/hrnet/fcn_hr18s_512x1024_160k_cityscapes >/dev/null & +echo 'configs/hrnet/fcn_hr48_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION fcn_hr48_512x512_160k_ade20k configs/hrnet/fcn_hr48_512x512_160k_ade20k.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24729 --work-dir work_dirs/hrnet/fcn_hr48_512x512_160k_ade20k >/dev/null & +echo 'configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION fcn_hr48_512x1024_160k_cityscapes configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24730 --work-dir work_dirs/hrnet/fcn_hr48_512x1024_160k_cityscapes >/dev/null & +echo 'configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION pspnet_r50-d8_512x1024_80k_cityscapes configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24731 --work-dir work_dirs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes >/dev/null & +echo 'configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION pspnet_r101-d8_512x1024_80k_cityscapes configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24732 --work-dir work_dirs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes >/dev/null & +echo 'configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION pspnet_r101-d8_512x512_160k_ade20k configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24733 --work-dir work_dirs/pspnet/pspnet_r101-d8_512x512_160k_ade20k >/dev/null & +echo 'configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION pspnet_r50-d8_512x512_160k_ade20k configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24734 --work-dir work_dirs/pspnet/pspnet_r50-d8_512x512_160k_ade20k >/dev/null & +echo 'configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION pspnet_s101-d8_512x512_160k_ade20k configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24735 --work-dir work_dirs/resnest/pspnet_s101-d8_512x512_160k_ade20k >/dev/null & +echo 'configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION pspnet_s101-d8_512x1024_80k_cityscapes configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24736 --work-dir work_dirs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes >/dev/null & +echo 'configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION fast_scnn_lr0.12_8x4_160k_cityscapes configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24737 --work-dir work_dirs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes >/dev/null & +echo 'configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION deeplabv3plus_r101-d8_769x769_80k_cityscapes configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24738 --work-dir work_dirs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes >/dev/null & +echo 'configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION deeplabv3plus_r101-d8_512x1024_80k_cityscapes configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24739 --work-dir work_dirs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes >/dev/null & +echo 'configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION deeplabv3plus_r50-d8_512x1024_80k_cityscapes configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24740 --work-dir work_dirs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes >/dev/null & +echo 'configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION deeplabv3plus_r50-d8_769x769_80k_cityscapes configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24741 --work-dir work_dirs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes >/dev/null & +echo 'configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py' & +GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION upernet_vit-b16_ln_mln_512x512_160k_ade20k configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24742 --work-dir work_dirs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k >/dev/null & +echo 'configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py' & +GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION upernet_deit-s16_ln_mln_512x512_160k_ade20k configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24743 --work-dir work_dirs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k >/dev/null & +echo 'configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py' & +GPUS=4 GPUS_PER_NODE=4 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24744 --work-dir work_dirs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_fp16_cityscapes >/dev/null & +echo 'configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py' & +GPUS=8 GPUS_PER_NODE=8 CPUS_PER_TASK=2 ./tools/slurm_train.sh $PARTITION upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py --cfg-options checkpoint_config.max_keep_ckpts=1 dist_params.port=24745 --work-dir work_dirs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K >/dev/null & diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/check_urls.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/check_urls.py new file mode 100644 index 0000000..42b6474 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/check_urls.py @@ -0,0 +1,101 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging +import os +from argparse import ArgumentParser + +import requests +import yaml as yml + +from mmseg.utils import get_root_logger + + +def check_url(url): + """Check url response status. + + Args: + url (str): url needed to check. + + Returns: + int, bool: status code and check flag. + """ + flag = True + r = requests.head(url) + status_code = r.status_code + if status_code == 403 or status_code == 404: + flag = False + + return status_code, flag + + +def parse_args(): + parser = ArgumentParser('url valid check.') + parser.add_argument( + '-m', + '--model-name', + type=str, + help='Select the model needed to check') + + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + model_name = args.model_name + + # yml path generate. + # If model_name is not set, script will check all of the models. + if model_name is not None: + yml_list = [(model_name, f'configs/{model_name}/{model_name}.yml')] + else: + # check all + yml_list = [(x, f'configs/{x}/{x}.yml') for x in os.listdir('configs/') + if x != '_base_'] + + logger = get_root_logger(log_file='url_check.log', log_level=logging.ERROR) + + for model_name, yml_path in yml_list: + # Default yaml loader unsafe. + model_infos = yml.load( + open(yml_path, 'r'), Loader=yml.CLoader)['Models'] + for model_info in model_infos: + config_name = model_info['Name'] + checkpoint_url = model_info['Weights'] + # checkpoint url check + status_code, flag = check_url(checkpoint_url) + if flag: + logger.info(f'checkpoint | {config_name} | {checkpoint_url} | ' + f'{status_code} valid') + else: + logger.error( + f'checkpoint | {config_name} | {checkpoint_url} | ' + f'{status_code} | error') + # log_json check + checkpoint_name = checkpoint_url.split('/')[-1] + model_time = '-'.join(checkpoint_name.split('-')[:-1]).replace( + f'{config_name}_', '') + # two style of log_json name + # use '_' to link model_time (will be deprecated) + log_json_url_1 = f'https://download.openmmlab.com/mmsegmentation/v0.5/{model_name}/{config_name}/{config_name}_{model_time}.log.json' # noqa + status_code_1, flag_1 = check_url(log_json_url_1) + # use '-' to link model_time + log_json_url_2 = f'https://download.openmmlab.com/mmsegmentation/v0.5/{model_name}/{config_name}/{config_name}-{model_time}.log.json' # noqa + status_code_2, flag_2 = check_url(log_json_url_2) + if flag_1 or flag_2: + if flag_1: + logger.info( + f'log.json | {config_name} | {log_json_url_1} | ' + f'{status_code_1} | valid') + else: + logger.info( + f'log.json | {config_name} | {log_json_url_2} | ' + f'{status_code_2} | valid') + else: + logger.error( + f'log.json | {config_name} | {log_json_url_1} & ' + f'{log_json_url_2} | {status_code_1} & {status_code_2} | ' + 'error') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/gather_benchmark_evaluation_results.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/gather_benchmark_evaluation_results.py new file mode 100644 index 0000000..47b557a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/gather_benchmark_evaluation_results.py @@ -0,0 +1,91 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import os.path as osp + +import mmcv +from mmcv import Config + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Gather benchmarked model evaluation results') + parser.add_argument('config', help='test config file path') + parser.add_argument( + 'root', + type=str, + help='root path of benchmarked models to be gathered') + parser.add_argument( + '--out', + type=str, + default='benchmark_evaluation_info.json', + help='output path of gathered metrics and compared ' + 'results to be stored') + + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + + root_path = args.root + metrics_out = args.out + result_dict = {} + + cfg = Config.fromfile(args.config) + + for model_key in cfg: + model_infos = cfg[model_key] + if not isinstance(model_infos, list): + model_infos = [model_infos] + for model_info in model_infos: + previous_metrics = model_info['metric'] + config = model_info['config'].strip() + fname, _ = osp.splitext(osp.basename(config)) + + # Load benchmark evaluation json + metric_json_dir = osp.join(root_path, fname) + if not osp.exists(metric_json_dir): + print(f'{metric_json_dir} not existed.') + continue + + json_list = glob.glob(osp.join(metric_json_dir, '*.json')) + if len(json_list) == 0: + print(f'There is no eval json in {metric_json_dir}.') + continue + + log_json_path = list(sorted(json_list))[-1] + metric = mmcv.load(log_json_path) + if config not in metric.get('config', {}): + print(f'{config} not included in {log_json_path}') + continue + + # Compare between new benchmark results and previous metrics + differential_results = dict() + new_metrics = dict() + for record_metric_key in previous_metrics: + if record_metric_key not in metric['metric']: + raise KeyError('record_metric_key not exist, please ' + 'check your config') + old_metric = previous_metrics[record_metric_key] + new_metric = round(metric['metric'][record_metric_key] * 100, + 2) + + differential = new_metric - old_metric + flag = '+' if differential > 0 else '-' + differential_results[ + record_metric_key] = f'{flag}{abs(differential):.2f}' + new_metrics[record_metric_key] = new_metric + + result_dict[config] = dict( + differential=differential_results, + previous=previous_metrics, + new=new_metrics) + + if metrics_out: + mmcv.dump(result_dict, metrics_out, indent=4) + print('===================================') + for config_name, metrics in result_dict.items(): + print(config_name, metrics) + print('===================================') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/gather_benchmark_train_results.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/gather_benchmark_train_results.py new file mode 100644 index 0000000..8aff2c4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/gather_benchmark_train_results.py @@ -0,0 +1,100 @@ +import argparse +import glob +import os.path as osp + +import mmcv +from gather_models import get_final_results +from mmcv import Config + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Gather benchmarked models train results') + parser.add_argument('config', help='test config file path') + parser.add_argument( + 'root', + type=str, + help='root path of benchmarked models to be gathered') + parser.add_argument( + '--out', + type=str, + default='benchmark_train_info.json', + help='output path of gathered metrics to be stored') + + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + + root_path = args.root + metrics_out = args.out + + evaluation_cfg = Config.fromfile(args.config) + + result_dict = {} + for model_key in evaluation_cfg: + model_infos = evaluation_cfg[model_key] + if not isinstance(model_infos, list): + model_infos = [model_infos] + for model_info in model_infos: + config = model_info['config'] + + # benchmark train dir + model_name = osp.split(osp.dirname(config))[1] + config_name = osp.splitext(osp.basename(config))[0] + exp_dir = osp.join(root_path, model_name, config_name) + if not osp.exists(exp_dir): + print(f'{config} hasn\'t {exp_dir}') + continue + + # parse config + cfg = mmcv.Config.fromfile(config) + total_iters = cfg.runner.max_iters + exp_metric = cfg.evaluation.metric + if not isinstance(exp_metric, list): + exp_metrics = [exp_metric] + + # determine whether total_iters ckpt exists + ckpt_path = f'iter_{total_iters}.pth' + if not osp.exists(osp.join(exp_dir, ckpt_path)): + print(f'{config} hasn\'t {ckpt_path}') + continue + + # only the last log json counts + log_json_path = list( + sorted(glob.glob(osp.join(exp_dir, '*.log.json'))))[-1] + + # extract metric value + model_performance = get_final_results(log_json_path, total_iters) + if model_performance is None: + print(f'log file error: {log_json_path}') + continue + + differential_results = dict() + old_results = dict() + new_results = dict() + for metric_key in model_performance: + if metric_key in ['mIoU']: + metric = round(model_performance[metric_key] * 100, 2) + old_metric = model_info['metric'][metric_key] + old_results[metric_key] = old_metric + new_results[metric_key] = metric + differential = metric - old_metric + flag = '+' if differential > 0 else '-' + differential_results[ + metric_key] = f'{flag}{abs(differential):.2f}' + result_dict[config] = dict( + differential_results=differential_results, + old_results=old_results, + new_results=new_results, + ) + + # 4 save or print results + if metrics_out: + mmcv.dump(result_dict, metrics_out, indent=4) + print('===================================') + for config_name, metrics in result_dict.items(): + print(config_name, metrics) + print('===================================') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/gather_models.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/gather_models.py new file mode 100644 index 0000000..3eedf61 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/gather_models.py @@ -0,0 +1,211 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import hashlib +import json +import os +import os.path as osp +import shutil + +import mmcv +import torch + +# build schedule look-up table to automatically find the final model +RESULTS_LUT = ['mIoU', 'mAcc', 'aAcc'] + + +def calculate_file_sha256(file_path): + """calculate file sha256 hash code.""" + with open(file_path, 'rb') as fp: + sha256_cal = hashlib.sha256() + sha256_cal.update(fp.read()) + return sha256_cal.hexdigest() + + +def process_checkpoint(in_file, out_file): + checkpoint = torch.load(in_file, map_location='cpu') + # remove optimizer for smaller file size + if 'optimizer' in checkpoint: + del checkpoint['optimizer'] + # if it is necessary to remove some sensitive data in checkpoint['meta'], + # add the code here. + torch.save(checkpoint, out_file) + # The hash code calculation and rename command differ on different system + # platform. + sha = calculate_file_sha256(out_file) + final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) + os.rename(out_file, final_file) + + # Remove prefix and suffix + final_file_name = osp.split(final_file)[1] + final_file_name = osp.splitext(final_file_name)[0] + + return final_file_name + + +def get_final_iter(config): + iter_num = config.split('_')[-2] + assert iter_num.endswith('k') + return int(iter_num[:-1]) * 1000 + + +def get_final_results(log_json_path, iter_num): + result_dict = dict() + last_iter = 0 + with open(log_json_path, 'r') as f: + for line in f.readlines(): + log_line = json.loads(line) + if 'mode' not in log_line.keys(): + continue + + # When evaluation, the 'iter' of new log json is the evaluation + # steps on single gpu. + flag1 = ('aAcc' in log_line) or (log_line['mode'] == 'val') + flag2 = (last_iter == iter_num - 50) or (last_iter == iter_num) + if flag1 and flag2: + result_dict.update({ + key: log_line[key] + for key in RESULTS_LUT if key in log_line + }) + return result_dict + + last_iter = log_line['iter'] + + +def parse_args(): + parser = argparse.ArgumentParser(description='Gather benchmarked models') + parser.add_argument( + '-f', '--config-name', type=str, help='Process the selected config.') + parser.add_argument( + '-w', + '--work-dir', + default='work_dirs/', + type=str, + help='Ckpt storage root folder of benchmarked models to be gathered.') + parser.add_argument( + '-c', + '--collect-dir', + default='work_dirs/gather', + type=str, + help='Ckpt collect root folder of gathered models.') + parser.add_argument( + '--all', action='store_true', help='whether include .py and .log') + + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + work_dir = args.work_dir + collect_dir = args.collect_dir + selected_config_name = args.config_name + mmcv.mkdir_or_exist(collect_dir) + + # find all models in the root directory to be gathered + raw_configs = list(mmcv.scandir('./configs', '.py', recursive=True)) + + # filter configs that is not trained in the experiments dir + used_configs = [] + for raw_config in raw_configs: + config_name = osp.splitext(osp.basename(raw_config))[0] + if osp.exists(osp.join(work_dir, config_name)): + if (selected_config_name is None + or selected_config_name == config_name): + used_configs.append(raw_config) + print(f'Find {len(used_configs)} models to be gathered') + + # find final_ckpt and log file for trained each config + # and parse the best performance + model_infos = [] + for used_config in used_configs: + config_name = osp.splitext(osp.basename(used_config))[0] + exp_dir = osp.join(work_dir, config_name) + # check whether the exps is finished + final_iter = get_final_iter(used_config) + final_model = 'iter_{}.pth'.format(final_iter) + model_path = osp.join(exp_dir, final_model) + + # skip if the model is still training + if not osp.exists(model_path): + print(f'{used_config} train not finished yet') + continue + + # get logs + log_json_paths = glob.glob(osp.join(exp_dir, '*.log.json')) + log_json_path = log_json_paths[0] + model_performance = None + for idx, _log_json_path in enumerate(log_json_paths): + model_performance = get_final_results(_log_json_path, final_iter) + if model_performance is not None: + log_json_path = _log_json_path + break + + if model_performance is None: + print(f'{used_config} model_performance is None') + continue + + model_time = osp.split(log_json_path)[-1].split('.')[0] + model_infos.append( + dict( + config_name=config_name, + results=model_performance, + iters=final_iter, + model_time=model_time, + log_json_path=osp.split(log_json_path)[-1])) + + # publish model for each checkpoint + publish_model_infos = [] + for model in model_infos: + config_name = model['config_name'] + model_publish_dir = osp.join(collect_dir, config_name) + + publish_model_path = osp.join(model_publish_dir, + config_name + '_' + model['model_time']) + trained_model_path = osp.join(work_dir, config_name, + 'iter_{}.pth'.format(model['iters'])) + if osp.exists(model_publish_dir): + for file in os.listdir(model_publish_dir): + if file.endswith('.pth'): + print(f'model {file} found') + model['model_path'] = osp.abspath( + osp.join(model_publish_dir, file)) + break + if 'model_path' not in model: + print(f'dir {model_publish_dir} exists, no model found') + + else: + mmcv.mkdir_or_exist(model_publish_dir) + + # convert model + final_model_path = process_checkpoint(trained_model_path, + publish_model_path) + model['model_path'] = final_model_path + + new_json_path = f'{config_name}_{model["log_json_path"]}' + # copy log + shutil.copy( + osp.join(work_dir, config_name, model['log_json_path']), + osp.join(model_publish_dir, new_json_path)) + + if args.all: + new_txt_path = new_json_path.rstrip('.json') + shutil.copy( + osp.join(work_dir, config_name, + model['log_json_path'].rstrip('.json')), + osp.join(model_publish_dir, new_txt_path)) + + if args.all: + # copy config to guarantee reproducibility + raw_config = osp.join('./configs', f'{config_name}.py') + mmcv.Config.fromfile(raw_config).dump( + osp.join(model_publish_dir, osp.basename(raw_config))) + + publish_model_infos.append(model) + + models = dict(models=publish_model_infos) + mmcv.dump(models, osp.join(collect_dir, 'model_infos.json'), indent=4) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/generate_benchmark_evaluation_script.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/generate_benchmark_evaluation_script.py new file mode 100644 index 0000000..d86e94b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/generate_benchmark_evaluation_script.py @@ -0,0 +1,114 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +from mmcv import Config + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert benchmark test model list to script') + parser.add_argument('config', help='test config file path') + parser.add_argument('--port', type=int, default=28171, help='dist port') + parser.add_argument( + '--work-dir', + default='work_dirs/benchmark_evaluation', + help='the dir to save metric') + parser.add_argument( + '--out', + type=str, + default='.dev/benchmark_evaluation.sh', + help='path to save model benchmark script') + + args = parser.parse_args() + return args + + +def process_model_info(model_info, work_dir): + config = model_info['config'].strip() + fname, _ = osp.splitext(osp.basename(config)) + job_name = fname + checkpoint = model_info['checkpoint'].strip() + work_dir = osp.join(work_dir, fname) + if not isinstance(model_info['eval'], list): + evals = [model_info['eval']] + else: + evals = model_info['eval'] + eval = ' '.join(evals) + return dict( + config=config, + job_name=job_name, + checkpoint=checkpoint, + work_dir=work_dir, + eval=eval) + + +def create_test_bash_info(commands, model_test_dict, port, script_name, + partition): + config = model_test_dict['config'] + job_name = model_test_dict['job_name'] + checkpoint = model_test_dict['checkpoint'] + work_dir = model_test_dict['work_dir'] + eval = model_test_dict['eval'] + + echo_info = f'\necho \'{config}\' &' + commands.append(echo_info) + commands.append('\n') + + command_info = f'GPUS=4 GPUS_PER_NODE=4 ' \ + f'CPUS_PER_TASK=2 {script_name} ' + + command_info += f'{partition} ' + command_info += f'{job_name} ' + command_info += f'{config} ' + command_info += f'$CHECKPOINT_DIR/{checkpoint} ' + + command_info += f'--eval {eval} ' + command_info += f'--work-dir {work_dir} ' + command_info += f'--cfg-options dist_params.port={port} ' + command_info += '&' + + commands.append(command_info) + + +def main(): + args = parse_args() + if args.out: + out_suffix = args.out.split('.')[-1] + assert args.out.endswith('.sh'), \ + f'Expected out file path suffix is .sh, but get .{out_suffix}' + + commands = [] + partition_name = 'PARTITION=$1' + commands.append(partition_name) + commands.append('\n') + + checkpoint_root = 'CHECKPOINT_DIR=$2' + commands.append(checkpoint_root) + commands.append('\n') + + script_name = osp.join('tools', 'slurm_test.sh') + port = args.port + work_dir = args.work_dir + + cfg = Config.fromfile(args.config) + + for model_key in cfg: + model_infos = cfg[model_key] + if not isinstance(model_infos, list): + model_infos = [model_infos] + for model_info in model_infos: + print('processing: ', model_info['config']) + model_test_dict = process_model_info(model_info, work_dir) + create_test_bash_info(commands, model_test_dict, port, script_name, + '$PARTITION') + port += 1 + + command_str = ''.join(commands) + if args.out: + with open(args.out, 'w') as f: + f.write(command_str + '\n') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/generate_benchmark_train_script.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/generate_benchmark_train_script.py new file mode 100644 index 0000000..6e8a0ae --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/generate_benchmark_train_script.py @@ -0,0 +1,91 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +# Default using 4 gpu when training +config_8gpu_list = [ + 'configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py', # noqa + 'configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py', + 'configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py', +] + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert benchmark model json to script') + parser.add_argument( + 'txt_path', type=str, help='txt path output by benchmark_filter') + parser.add_argument('--port', type=int, default=24727, help='dist port') + parser.add_argument( + '--out', + type=str, + default='.dev/benchmark_train.sh', + help='path to save model benchmark script') + + args = parser.parse_args() + return args + + +def create_train_bash_info(commands, config, script_name, partition, port): + cfg = config.strip() + + # print cfg name + echo_info = f'echo \'{cfg}\' &' + commands.append(echo_info) + commands.append('\n') + + _, model_name = osp.split(osp.dirname(cfg)) + config_name, _ = osp.splitext(osp.basename(cfg)) + # default setting + if cfg in config_8gpu_list: + command_info = f'GPUS=8 GPUS_PER_NODE=8 ' \ + f'CPUS_PER_TASK=2 {script_name} ' + else: + command_info = f'GPUS=4 GPUS_PER_NODE=4 ' \ + f'CPUS_PER_TASK=2 {script_name} ' + command_info += f'{partition} ' + command_info += f'{config_name} ' + command_info += f'{cfg} ' + command_info += f'--cfg-options ' \ + f'checkpoint_config.max_keep_ckpts=1 ' \ + f'dist_params.port={port} ' + command_info += f'--work-dir work_dirs/{model_name}/{config_name} ' + # Let the script shut up + command_info += '>/dev/null &' + + commands.append(command_info) + commands.append('\n') + + +def main(): + args = parse_args() + if args.out: + out_suffix = args.out.split('.')[-1] + assert args.out.endswith('.sh'), \ + f'Expected out file path suffix is .sh, but get .{out_suffix}' + + root_name = './tools' + script_name = osp.join(root_name, 'slurm_train.sh') + port = args.port + partition_name = 'PARTITION=$1' + + commands = [] + commands.append(partition_name) + commands.append('\n') + commands.append('\n') + + with open(args.txt_path, 'r') as f: + model_cfgs = f.readlines() + for i, cfg in enumerate(model_cfgs): + create_train_bash_info(commands, cfg, script_name, '$PARTITION', + port) + port += 1 + + command_str = ''.join(commands) + if args.out: + with open(args.out, 'w') as f: + f.write(command_str) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/example_config.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/example_config.py new file mode 100644 index 0000000..bc2b4d6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/example_config.py @@ -0,0 +1,18 @@ +work_dir = '../../work_dirs' +metric = 'mIoU' + +# specify the log files we would like to collect in `log_items` +log_items = [ + 'segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup', + 'segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr', + 'segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr', + 'segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr' +] +# or specify ignore_keywords, then the folders whose name contain +# `'segformer'` won't be collected +# ignore_keywords = ['segformer'] + +# should not include metric +other_info_keys = ['mAcc'] +markdown_file = 'markdowns/lr_in_trans.json.md' +json_file = 'jsons/trans_in_cnn.json' diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/log_collector.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/log_collector.py new file mode 100644 index 0000000..d0f4080 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/log_collector.py @@ -0,0 +1,143 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import datetime +import json +import os +import os.path as osp +from collections import OrderedDict + +from utils import load_config + +# automatically collect all the results + +# The structure of the directory: +# ├── work-dir +# │ ├── config_1 +# │ │ ├── time1.log.json +# │ │ ├── time2.log.json +# │ │ ├── time3.log.json +# │ │ ├── time4.log.json +# │ ├── config_2 +# │ │ ├── time5.log.json +# │ │ ├── time6.log.json +# │ │ ├── time7.log.json +# │ │ ├── time8.log.json + + +def parse_args(): + parser = argparse.ArgumentParser(description='extract info from log.json') + parser.add_argument('config_dir') + args = parser.parse_args() + return args + + +def has_keyword(name: str, keywords: list): + for a_keyword in keywords: + if a_keyword in name: + return True + return False + + +def main(): + args = parse_args() + cfg = load_config(args.config_dir) + work_dir = cfg['work_dir'] + metric = cfg['metric'] + log_items = cfg.get('log_items', []) + ignore_keywords = cfg.get('ignore_keywords', []) + other_info_keys = cfg.get('other_info_keys', []) + markdown_file = cfg.get('markdown_file', None) + json_file = cfg.get('json_file', None) + + if json_file and osp.split(json_file)[0] != '': + os.makedirs(osp.split(json_file)[0], exist_ok=True) + if markdown_file and osp.split(markdown_file)[0] != '': + os.makedirs(osp.split(markdown_file)[0], exist_ok=True) + + assert not (log_items and ignore_keywords), \ + 'log_items and ignore_keywords cannot be specified at the same time' + assert metric not in other_info_keys, \ + 'other_info_keys should not contain metric' + + if ignore_keywords and isinstance(ignore_keywords, str): + ignore_keywords = [ignore_keywords] + if other_info_keys and isinstance(other_info_keys, str): + other_info_keys = [other_info_keys] + if log_items and isinstance(log_items, str): + log_items = [log_items] + + if not log_items: + log_items = [ + item for item in sorted(os.listdir(work_dir)) + if not has_keyword(item, ignore_keywords) + ] + + experiment_info_list = [] + for config_dir in log_items: + preceding_path = os.path.join(work_dir, config_dir) + log_list = [ + item for item in os.listdir(preceding_path) + if item.endswith('.log.json') + ] + log_list = sorted( + log_list, + key=lambda time_str: datetime.datetime.strptime( + time_str, '%Y%m%d_%H%M%S.log.json')) + val_list = [] + last_iter = 0 + for log_name in log_list: + with open(os.path.join(preceding_path, log_name), 'r') as f: + # ignore the info line + f.readline() + all_lines = f.readlines() + val_list.extend([ + json.loads(line) for line in all_lines + if json.loads(line)['mode'] == 'val' + ]) + for index in range(len(all_lines) - 1, -1, -1): + line_dict = json.loads(all_lines[index]) + if line_dict['mode'] == 'train': + last_iter = max(last_iter, line_dict['iter']) + break + + new_log_dict = dict( + method=config_dir, metric_used=metric, last_iter=last_iter) + for index, log in enumerate(val_list, 1): + new_ordered_dict = OrderedDict() + new_ordered_dict['eval_index'] = index + new_ordered_dict[metric] = log[metric] + for key in other_info_keys: + if key in log: + new_ordered_dict[key] = log[key] + val_list[index - 1] = new_ordered_dict + + assert len(val_list) >= 1, \ + f"work dir {config_dir} doesn't contain any evaluation." + new_log_dict['last eval'] = val_list[-1] + new_log_dict['best eval'] = max(val_list, key=lambda x: x[metric]) + experiment_info_list.append(new_log_dict) + print(f'{config_dir} is processed') + + if json_file: + with open(json_file, 'w') as f: + json.dump(experiment_info_list, f, indent=4) + + if markdown_file: + lines_to_write = [] + for index, log in enumerate(experiment_info_list, 1): + lines_to_write.append( + f"|{index}|{log['method']}|{log['best eval'][metric]}" + f"|{log['best eval']['eval_index']}|" + f"{log['last eval'][metric]}|" + f"{log['last eval']['eval_index']}|{log['last_iter']}|\n") + with open(markdown_file, 'w') as f: + f.write(f'|exp_num|method|{metric} best|best index|' + f'{metric} last|last index|last iter num|\n') + f.write('|:---:|:---:|:---:|:---:|:---:|:---:|:---:|\n') + f.writelines(lines_to_write) + + print('processed successfully') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/readme.md b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/readme.md new file mode 100644 index 0000000..4a8b9b6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/readme.md @@ -0,0 +1,144 @@ +# Log Collector + +## Function + +Automatically collect logs and write the result in a json file or markdown file. + +If there are several `.log.json` files in one folder, Log Collector assumes that the `.log.json` files other than the first one are resume from the preceding `.log.json` file. Log Collector returns the result considering all `.log.json` files. + +## Usage: + +To use log collector, you need to write a config file to configure the log collector first. + +For example: + +example_config.py: + +```python +# The work directory that contains folders that contains .log.json files. +work_dir = '../../work_dirs' +# The metric used to find the best evaluation. +metric = 'mIoU' + +# **Don't specify the log_items and ignore_keywords at the same time.** +# Specify the log files we would like to collect in `log_items`. +# The folders specified should be the subdirectories of `work_dir`. +log_items = [ + 'segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup', + 'segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr', + 'segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr', + 'segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr' +] +# Or specify `ignore_keywords`. The folders whose name contain one +# of the keywords in the `ignore_keywords` list(e.g., `'segformer'`) +# won't be collected. +# ignore_keywords = ['segformer'] + +# Other log items in .log.json that you want to collect. +# should not include metric. +other_info_keys = ["mAcc"] +# The output markdown file's name. +markdown_file ='markdowns/lr_in_trans.json.md' +# The output json file's name. (optional) +json_file = 'jsons/trans_in_cnn.json' +``` + +The structure of the work-dir directory should be like: + +```text +├── work-dir +│ ├── folder1 +│ │ ├── time1.log.json +│ │ ├── time2.log.json +│ │ ├── time3.log.json +│ │ ├── time4.log.json +│ ├── folder2 +│ │ ├── time5.log.json +│ │ ├── time6.log.json +│ │ ├── time7.log.json +│ │ ├── time8.log.json +``` + +Then , cd to the log collector folder. + +Now you can run log_collector.py by using command: + +```bash +python log_collector.py ./example_config.py +``` + +The output markdown file is like: + +| exp_num | method | mIoU best | best index | mIoU last | last index | last iter num | +| :-----: | :-----------------------------------------------------: | :-------: | :--------: | :-------: | :--------: | :-----------: | +| 1 | segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup | 0.2776 | 10 | 0.2776 | 10 | 160000 | +| 2 | segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr | 0.2802 | 10 | 0.2802 | 10 | 160000 | +| 3 | segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr | 0.4943 | 11 | 0.4943 | 11 | 160000 | +| 4 | segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr | 0.4883 | 11 | 0.4883 | 11 | 160000 | + +The output json file is like: + +```json +[ + { + "method": "segformer_mit-b5_512x512_160k_ade20k_cnn_lr_with_warmup", + "metric_used": "mIoU", + "last_iter": 160000, + "last eval": { + "eval_index": 10, + "mIoU": 0.2776, + "mAcc": 0.3779 + }, + "best eval": { + "eval_index": 10, + "mIoU": 0.2776, + "mAcc": 0.3779 + } + }, + { + "method": "segformer_mit-b5_512x512_160k_ade20k_cnn_no_warmup_lr", + "metric_used": "mIoU", + "last_iter": 160000, + "last eval": { + "eval_index": 10, + "mIoU": 0.2802, + "mAcc": 0.3764 + }, + "best eval": { + "eval_index": 10, + "mIoU": 0.2802, + "mAcc": 0.3764 + } + }, + { + "method": "segformer_mit-b5_512x512_160k_ade20k_mit_trans_lr", + "metric_used": "mIoU", + "last_iter": 160000, + "last eval": { + "eval_index": 11, + "mIoU": 0.4943, + "mAcc": 0.6097 + }, + "best eval": { + "eval_index": 11, + "mIoU": 0.4943, + "mAcc": 0.6097 + } + }, + { + "method": "segformer_mit-b5_512x512_160k_ade20k_swin_trans_lr", + "metric_used": "mIoU", + "last_iter": 160000, + "last eval": { + "eval_index": 11, + "mIoU": 0.4883, + "mAcc": 0.6061 + }, + "best eval": { + "eval_index": 11, + "mIoU": 0.4883, + "mAcc": 0.6061 + } + } +] +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/utils.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/utils.py new file mode 100644 index 0000000..848516a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/log_collector/utils.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# modified from https://github.dev/open-mmlab/mmcv +import os.path as osp +import sys +from importlib import import_module + + +def load_config(cfg_dir: str) -> dict: + assert cfg_dir.endswith('.py') + root_path, file_name = osp.split(cfg_dir) + temp_module = osp.splitext(file_name)[0] + sys.path.insert(0, root_path) + mod = import_module(temp_module) + sys.path.pop(0) + cfg_dict = { + k: v + for k, v in mod.__dict__.items() if not k.startswith('__') + } + del sys.modules[temp_module] + return cfg_dict diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/md2yml.py b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/md2yml.py new file mode 100644 index 0000000..1d68498 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.dev/md2yml.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python + +# Copyright (c) OpenMMLab. All rights reserved. +# This tool is used to update model-index.yml which is required by MIM, and +# will be automatically called as a pre-commit hook. The updating will be +# triggered if any change of model information (.md files in configs/) has been +# detected before a commit. + +import glob +import os +import os.path as osp +import re +import sys + +from lxml import etree +from mmcv.fileio import dump + +MMSEG_ROOT = osp.dirname(osp.dirname((osp.dirname(__file__)))) + +COLLECTIONS = [ + 'ANN', 'APCNet', 'BiSeNetV1', 'BiSeNetV2', 'CCNet', 'CGNet', 'DANet', + 'DeepLabV3', 'DeepLabV3+', 'DMNet', 'DNLNet', 'DPT', 'EMANet', 'EncNet', + 'ERFNet', 'FastFCN', 'FastSCNN', 'FCN', 'GCNet', 'ICNet', 'ISANet', 'KNet', + 'NonLocalNet', 'OCRNet', 'PointRend', 'PSANet', 'PSPNet', 'Segformer', + 'Segmenter', 'FPN', 'SETR', 'STDC', 'UNet', 'UPerNet' +] +COLLECTIONS_TEMP = [] + + +def dump_yaml_and_check_difference(obj, filename, sort_keys=False): + """Dump object to a yaml file, and check if the file content is different + from the original. + + Args: + obj (any): The python object to be dumped. + filename (str): YAML filename to dump the object to. + sort_keys (str); Sort key by dictionary order. + Returns: + Bool: If the target YAML file is different from the original. + """ + + str_dump = dump(obj, None, file_format='yaml', sort_keys=sort_keys) + if osp.isfile(filename): + file_exists = True + with open(filename, 'r', encoding='utf-8') as f: + str_orig = f.read() + else: + file_exists = False + str_orig = None + + if file_exists and str_orig == str_dump: + is_different = False + else: + is_different = True + with open(filename, 'w', encoding='utf-8') as f: + f.write(str_dump) + + return is_different + + +def parse_md(md_file): + """Parse .md file and convert it to a .yml file which can be used for MIM. + + Args: + md_file (str): Path to .md file. + Returns: + Bool: If the target YAML file is different from the original. + """ + collection_name = osp.split(osp.dirname(md_file))[1] + configs = os.listdir(osp.dirname(md_file)) + + collection = dict( + Name=collection_name, + Metadata={'Training Data': []}, + Paper={ + 'URL': '', + 'Title': '' + }, + README=md_file, + Code={ + 'URL': '', + 'Version': '' + }) + collection.update({'Converted From': {'Weights': '', 'Code': ''}}) + models = [] + datasets = [] + paper_url = None + paper_title = None + code_url = None + code_version = None + repo_url = None + + # To avoid re-counting number of backbone model in OpenMMLab, + # if certain model in configs folder is backbone whose name is already + # recorded in MMClassification, then the `COLLECTION` dict of this model + # in MMSegmentation should be deleted, and `In Collection` in `Models` + # should be set with head or neck of this config file. + is_backbone = None + + with open(md_file, 'r', encoding='UTF-8') as md: + lines = md.readlines() + i = 0 + current_dataset = '' + while i < len(lines): + line = lines[i].strip() + # In latest README.md the title and url are in the third line. + if i == 2: + paper_url = lines[i].split('](')[1].split(')')[0] + paper_title = lines[i].split('](')[0].split('[')[1] + if len(line) == 0: + i += 1 + continue + elif line[:3] == ' Before you create a PR, make sure that your code lints and is formatted by yapf. + +### C++ and CUDA + +We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html). diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/config.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000..aa982e5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,6 @@ +blank_issues_enabled: false + +contact_links: + - name: MMSegmentation Documentation + url: https://mmsegmentation.readthedocs.io + about: Check the docs and FAQ to see if you question is already answered. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/error-report.md b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/error-report.md new file mode 100644 index 0000000..807781c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/error-report.md @@ -0,0 +1,48 @@ +--- +name: Error report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' +--- + +Thanks for your error report and we appreciate it a lot. + +**Checklist** + +1. I have searched related issues but cannot get the expected help. +2. The bug has not been fixed in the latest version. + +**Describe the bug** +A clear and concise description of what the bug is. + +**Reproduction** + +1. What command or script did you run? + + ```none + A placeholder for the command. + ``` + +2. Did you make any modifications on the code or config? Did you understand what you have modified? + +3. What dataset did you use? + +**Environment** + +1. Please run `python mmseg/utils/collect_env.py` to collect necessary environment information and paste it here. +2. You may add addition that may be helpful for locating the problem, such as + - How you installed PyTorch \[e.g., pip, conda, source\] + - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.) + +**Error traceback** + +If applicable, paste the error trackback here. + +```none +A placeholder for trackback. +``` + +**Bug fix** + +If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated! diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/feature_request.md b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 0000000..7e3b855 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,21 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' +--- + +# Describe the feature + +**Motivation** +A clear and concise description of the motivation of the feature. +Ex1. It is inconvenient when \[....\]. +Ex2. There is a recent paper \[....\], which is very helpful for \[....\]. + +**Related resources** +If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful. + +**Additional context** +Add any other context or screenshots about the feature request here. +If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/general_questions.md b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/general_questions.md new file mode 100644 index 0000000..f02dd63 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/general_questions.md @@ -0,0 +1,7 @@ +--- +name: General questions +about: Ask general questions to get help +title: '' +labels: '' +assignees: '' +--- diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/reimplementation_questions.md b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/reimplementation_questions.md new file mode 100644 index 0000000..63e4c3b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/ISSUE_TEMPLATE/reimplementation_questions.md @@ -0,0 +1,69 @@ +--- +name: Reimplementation Questions +about: Ask about questions during model reimplementation +title: '' +labels: reimplementation +assignees: '' +--- + +If you feel we have helped you, give us a STAR! :satisfied: + +**Notice** + +There are several common situations in the reimplementation issues as below + +1. Reimplement a model in the model zoo using the provided configs +2. Reimplement a model in the model zoo on other datasets (e.g., custom datasets) +3. Reimplement a custom model but all the components are implemented in MMSegmentation +4. Reimplement a custom model with new modules implemented by yourself + +There are several things to do for different cases as below. + +- For cases 1 & 3, please follow the steps in the following sections thus we could help to quickly identify the issue. +- For cases 2 & 4, please understand that we are not able to do much help here because we usually do not know the full code, and the users should be responsible for the code they write. +- One suggestion for cases 2 & 4 is that the users should first check whether the bug lies in the self-implemented code or the original code. For example, users can first make sure that the same model runs well on supported datasets. If you still need help, please describe what you have done and what you obtain in the issue, and follow the steps in the following sections, and try as clear as possible so that we can better help you. + +**Checklist** + +1. I have searched related issues but cannot get the expected help. +2. The issue has not been fixed in the latest version. + +**Describe the issue** + +A clear and concise description of the problem you meet and what you have done. + +**Reproduction** + +1. What command or script did you run? + +``` +A placeholder for the command. +``` + +2. What config dir you run? + +``` +A placeholder for the config. +``` + +3. Did you make any modifications to the code or config? Did you understand what you have modified? +4. What dataset did you use? + +**Environment** + +1. Please run `PYTHONPATH=${PWD}:$PYTHONPATH python mmseg/utils/collect_env.py` to collect the necessary environment information and paste it here. +2. You may add an addition that may be helpful for locating the problem, such as + 1. How you installed PyTorch \[e.g., pip, conda, source\] + 2. Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.) + +**Results** + +If applicable, paste the related results here, e.g., what you expect and what you get. + +``` +A placeholder for results comparison +``` + +**Issue fix** + +If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated! diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.github/pull_request_template.md b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/pull_request_template.md new file mode 100644 index 0000000..09d5305 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/pull_request_template.md @@ -0,0 +1,25 @@ +Thanks for your contribution and we appreciate it a lot. The following instructions would make your pull request more healthy and more easily get feedback. If you do not understand some items, don't worry, just make the pull request and seek help from maintainers. + +## Motivation + +Please describe the motivation of this PR and the goal you want to achieve through this PR. + +## Modification + +Please briefly describe what modification is made in this PR. + +## BC-breaking (Optional) + +Does the modification introduce changes that break the backward-compatibility of the downstream repos? +If so, please describe how it breaks the compatibility and how the downstream projects should modify their code to keep compatibility with this PR. + +## Use cases (Optional) + +If this PR introduces a new feature, it is better to list some use cases here, and update the documentation. + +## Checklist + +1. Pre-commit or other linting tools are used to fix the potential lint issues. +2. The modification is covered by complete unit tests. If not, please add more unit test to ensure the correctness. +3. If the modification has potential influence on downstream projects, this PR should be tested with downstream projects, like MMDet or MMDet3D. +4. The documentation has been modified accordingly, like docstring or example tutorials. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/build.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/build.yml new file mode 100644 index 0000000..be26581 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/build.yml @@ -0,0 +1,257 @@ +name: build + +on: + push: + paths-ignore: + - 'demo/**' + - '.dev/**' + - 'docker/**' + - 'tools/**' + - '**.md' + + pull_request: + paths-ignore: + - 'demo/**' + - '.dev/**' + - 'docker/**' + - 'tools/**' + - 'docs/**' + - '**.md' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build_cpu: + runs-on: ubuntu-18.04 + strategy: + matrix: + python-version: [3.7] + torch: [1.5.1, 1.6.0, 1.7.0, 1.8.0, 1.9.0] + include: + - torch: 1.5.1 + torch_version: torch1.5 + torchvision: 0.6.1 + - torch: 1.6.0 + torch_version: torch1.6 + torchvision: 0.7.0 + - torch: 1.7.0 + torch_version: torch1.7 + torchvision: 0.8.1 + - torch: 1.8.0 + torch_version: torch1.8 + torchvision: 0.9.0 + - torch: 1.9.0 + torch_version: torch1.9 + torchvision: 0.10.0 + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Upgrade pip + run: pip install pip --upgrade + - name: Install PyTorch + run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html + - name: Install MMCV + run: | + pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/${{matrix.torch_version}}/index.html + python -c 'import mmcv; print(mmcv.__version__)' + - name: Install unittest dependencies + run: | + pip install -r requirements.txt + - name: Build and install + run: rm -rf .eggs && pip install -e . + - name: Run unittests and generate coverage report + run: | + pip install timm + coverage run --branch --source mmseg -m pytest tests/ + coverage xml + coverage report -m + if: ${{matrix.torch >= '1.5.0'}} + - name: Skip timm unittests and generate coverage report + run: | + coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py + coverage xml + coverage report -m + if: ${{matrix.torch < '1.5.0'}} + + build_cuda101: + runs-on: ubuntu-18.04 + container: + image: pytorch/pytorch:1.6.0-cuda10.1-cudnn7-devel + + strategy: + matrix: + python-version: [3.7] + torch: + [ + 1.5.1+cu101, + 1.6.0+cu101, + 1.7.0+cu101, + 1.8.0+cu101 + ] + include: + - torch: 1.5.1+cu101 + torch_version: torch1.5 + torchvision: 0.6.1+cu101 + - torch: 1.6.0+cu101 + torch_version: torch1.6 + torchvision: 0.7.0+cu101 + - torch: 1.7.0+cu101 + torch_version: torch1.7 + torchvision: 0.8.1+cu101 + - torch: 1.8.0+cu101 + torch_version: torch1.8 + torchvision: 0.9.0+cu101 + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Fetch GPG keys + run: | + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub + - name: Install system dependencies + run: | + apt-get update && apt-get install -y libgl1-mesa-glx ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 python${{matrix.python-version}}-dev + apt-get clean + rm -rf /var/lib/apt/lists/* + - name: Install Pillow + run: python -m pip install Pillow==6.2.2 + if: ${{matrix.torchvision < 0.5}} + - name: Install PyTorch + run: python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html + - name: Install mmseg dependencies + run: | + python -V + python -m pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/${{matrix.torch_version}}/index.html + python -m pip install -r requirements.txt + python -c 'import mmcv; print(mmcv.__version__)' + - name: Build and install + run: | + rm -rf .eggs + python setup.py check -m -s + TORCH_CUDA_ARCH_LIST=7.0 pip install . + - name: Run unittests and generate coverage report + run: | + python -m pip install timm + coverage run --branch --source mmseg -m pytest tests/ + coverage xml + coverage report -m + if: ${{matrix.torch >= '1.5.0'}} + - name: Skip timm unittests and generate coverage report + run: | + coverage run --branch --source mmseg -m pytest tests/ --ignore tests/test_models/test_backbones/test_timm_backbone.py + coverage xml + coverage report -m + if: ${{matrix.torch < '1.5.0'}} + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v1.0.10 + with: + file: ./coverage.xml + flags: unittests + env_vars: OS,PYTHON + name: codecov-umbrella + fail_ci_if_error: false + + build_cuda102: + runs-on: ubuntu-18.04 + container: + image: pytorch/pytorch:1.9.0-cuda10.2-cudnn7-devel + + strategy: + matrix: + python-version: [3.6, 3.7, 3.8, 3.9] + torch: [1.9.0+cu102] + include: + - torch: 1.9.0+cu102 + torch_version: torch1.9 + torchvision: 0.10.0+cu102 + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Fetch GPG keys + run: | + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub + apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub + - name: Install system dependencies + run: | + apt-get update && apt-get install -y libgl1-mesa-glx ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 + apt-get clean + rm -rf /var/lib/apt/lists/* + - name: Install Pillow + run: python -m pip install Pillow==6.2.2 + if: ${{matrix.torchvision < 0.5}} + - name: Install PyTorch + run: python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html + - name: Install mmseg dependencies + run: | + python -V + python -m pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu102/${{matrix.torch_version}}/index.html + python -m pip install -r requirements.txt + python -c 'import mmcv; print(mmcv.__version__)' + - name: Build and install + run: | + rm -rf .eggs + python setup.py check -m -s + TORCH_CUDA_ARCH_LIST=7.0 pip install . + - name: Run unittests and generate coverage report + run: | + python -m pip install timm + coverage run --branch --source mmseg -m pytest tests/ + coverage xml + coverage report -m + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v2 + with: + files: ./coverage.xml + flags: unittests + env_vars: OS,PYTHON + name: codecov-umbrella + fail_ci_if_error: false + + test_windows: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [windows-2022] + python: [3.8] + platform: [cpu, cu111] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: Upgrade pip + run: python -m pip install pip --upgrade --user + - name: Install OpenCV + run: pip install opencv-python>=3 + - name: Install PyTorch + # As a complement to Linux CI, we test on PyTorch LTS version + run: pip install torch==1.8.2+${{ matrix.platform }} torchvision==0.9.2+${{ matrix.platform }} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html + - name: Install MMCV + run: | + pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8/index.html --only-binary mmcv-full + - name: Install unittest dependencies + run: pip install -r requirements/tests.txt -r requirements/optional.txt + - name: Build and install + run: pip install -e . + - name: Run unittests + run: | + python -m pip install timm + coverage run --branch --source mmseg -m pytest tests/ + - name: Generate coverage report + run: | + coverage xml + coverage report -m diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/deploy.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/deploy.yml new file mode 100644 index 0000000..ab64085 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/deploy.yml @@ -0,0 +1,26 @@ +name: deploy + +on: push + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build-n-publish: + runs-on: ubuntu-latest + if: startsWith(github.event.ref, 'refs/tags') + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Build MMSegmentation + run: | + pip install wheel + python setup.py sdist bdist_wheel + - name: Publish distribution to PyPI + run: | + pip install twine + twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }} diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/lint.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/lint.yml new file mode 100644 index 0000000..a4d7baf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/lint.yml @@ -0,0 +1,28 @@ +name: lint + +on: [push, pull_request] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.7 + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Install pre-commit hook + run: | + pip install pre-commit + pre-commit install + - name: Linting + run: | + pre-commit run --all-files + - name: Check docstring coverage + run: | + pip install interrogate + interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --exclude mmseg/ops --ignore-regex "__repr__" --fail-under 80 mmseg diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/test_mim.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/test_mim.yml new file mode 100644 index 0000000..592043e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.github/workflows/test_mim.yml @@ -0,0 +1,44 @@ +name: test-mim + +on: + push: + paths: + - 'model-index.yml' + - 'configs/**' + + pull_request: + paths: + - 'model-index.yml' + - 'configs/**' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build_cpu: + runs-on: ubuntu-18.04 + strategy: + matrix: + python-version: [3.7] + torch: [1.8.0] + include: + - torch: 1.8.0 + torch_version: torch1.8 + torchvision: 0.9.0 + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Upgrade pip + run: pip install pip --upgrade + - name: Install PyTorch + run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html + - name: Install openmim + run: pip install openmim + - name: Build and install + run: rm -rf .eggs && mim install -e . + - name: test commands of mim + run: mim search mmsegmentation diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.gitignore b/prediction/image/mx15hdi/Detect/mmsegmentation/.gitignore new file mode 100644 index 0000000..787d13e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.gitignore @@ -0,0 +1,120 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/en/_build/ +docs/zh_cn/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +.DS_Store + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +data +.vscode +.idea + +# custom +*.pkl +*.pkl.json +*.log.json +work_dirs/ +mmseg/.mim + +# Pytorch +*.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.owners.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/.owners.yml new file mode 100644 index 0000000..b0f177f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.owners.yml @@ -0,0 +1,11 @@ +assign: + strategy: + # random + # round-robin + daily-shift-based + assignees: + - MengzhangLI + - xiexinch + - MeowZheng + - MengzhangLI + - xiexinch diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.pre-commit-config.yaml b/prediction/image/mx15hdi/Detect/mmsegmentation/.pre-commit-config.yaml new file mode 100644 index 0000000..884f5cd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.pre-commit-config.yaml @@ -0,0 +1,60 @@ +repos: + - repo: https://gitlab.com/pycqa/flake8.git + rev: 3.8.3 + hooks: + - id: flake8 + - repo: https://github.com/PyCQA/isort + rev: 5.10.1 + hooks: + - id: isort + - repo: https://github.com/pre-commit/mirrors-yapf + rev: v0.30.0 + hooks: + - id: yapf + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.1.0 + hooks: + - id: trailing-whitespace + - id: check-yaml + - id: end-of-file-fixer + - id: requirements-txt-fixer + - id: double-quote-string-fixer + - id: check-merge-conflict + - id: fix-encoding-pragma + args: ["--remove"] + - id: mixed-line-ending + args: ["--fix=lf"] + - repo: https://github.com/executablebooks/mdformat + rev: 0.7.9 + hooks: + - id: mdformat + args: ["--number"] + additional_dependencies: + - mdformat-openmmlab + - mdformat_frontmatter + - linkify-it-py + - repo: https://github.com/codespell-project/codespell + rev: v2.1.0 + hooks: + - id: codespell + - repo: https://github.com/myint/docformatter + rev: v1.3.1 + hooks: + - id: docformatter + args: ["--in-place", "--wrap-descriptions", "79"] + - repo: local + hooks: + - id: update-model-index + name: update-model-index + description: Collect model information and update model-index.yml + entry: .dev/md2yml.py + additional_dependencies: [mmcv, lxml, opencv-python] + language: python + files: ^configs/.*\.md$ + require_serial: true + - repo: https://github.com/open-mmlab/pre-commit-hooks + rev: v0.2.0 # Use the rev to fix revision + hooks: + - id: check-algo-readme + - id: check-copyright + args: ["mmseg", "tools", "tests", "demo"] # the dir_to_check with expected directory to check diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/.readthedocs.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/.readthedocs.yml new file mode 100644 index 0000000..6cfbf5d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/.readthedocs.yml @@ -0,0 +1,9 @@ +version: 2 + +formats: all + +python: + version: 3.7 + install: + - requirements: requirements/docs.txt + - requirements: requirements/readthedocs.txt diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/CITATION.cff b/prediction/image/mx15hdi/Detect/mmsegmentation/CITATION.cff new file mode 100644 index 0000000..cfd7cab --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/CITATION.cff @@ -0,0 +1,8 @@ +cff-version: 1.2.0 +message: "If you use this software, please cite it as below." +authors: + - name: "MMSegmentation Contributors" +title: "OpenMMLab Semantic Segmentation Toolbox and Benchmark" +date-released: 2020-07-10 +url: "https://github.com/open-mmlab/mmsegmentation" +license: Apache-2.0 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/LICENSE b/prediction/image/mx15hdi/Detect/mmsegmentation/LICENSE new file mode 100644 index 0000000..38e625b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/LICENSE @@ -0,0 +1,203 @@ +Copyright 2020 The MMSegmentation Authors. All rights reserved. + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 The MMSegmentation Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/LICENSES.md b/prediction/image/mx15hdi/Detect/mmsegmentation/LICENSES.md new file mode 100644 index 0000000..790d81e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/LICENSES.md @@ -0,0 +1,7 @@ +# Licenses for special features + +In this file, we list the features with other licenses instead of Apache 2.0. Users should be careful about adopting these features in any commercial matters. + +| Feature | Files | License | +| :-------: | :-------------------------------------------------------------------------------------------------------------------------------------------------: | :-----------------------------------------------------------: | +| SegFormer | [mmseg/models/decode_heads/segformer_head.py](https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/decode_heads/segformer_head.py) | [NVIDIA License](https://github.com/NVlabs/SegFormer#license) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/MANIFEST.in b/prediction/image/mx15hdi/Detect/mmsegmentation/MANIFEST.in new file mode 100644 index 0000000..e307d81 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/MANIFEST.in @@ -0,0 +1,4 @@ +include requirements/*.txt +include mmseg/.mim/model-index.yml +recursive-include mmseg/.mim/configs *.py *.yml +recursive-include mmseg/.mim/tools *.py *.sh diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/README.md new file mode 100644 index 0000000..2d5027e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/README.md @@ -0,0 +1,229 @@ +
+ +
 
+
+ OpenMMLab website + + + HOT + + +      + OpenMMLab platform + + + TRY IT OUT + + +
+
 
+ +
+ +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mmsegmentation)](https://pypi.org/project/mmsegmentation/) +[![PyPI](https://img.shields.io/pypi/v/mmsegmentation)](https://pypi.org/project/mmsegmentation) +[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmsegmentation.readthedocs.io/en/latest/) +[![badge](https://github.com/open-mmlab/mmsegmentation/workflows/build/badge.svg)](https://github.com/open-mmlab/mmsegmentation/actions) +[![codecov](https://codecov.io/gh/open-mmlab/mmsegmentation/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmsegmentation) +[![license](https://img.shields.io/github/license/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/blob/master/LICENSE) +[![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/issues) +[![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/issues) + +[📘Documentation](https://mmsegmentation.readthedocs.io/en/latest/) | +[🛠️Installation](https://mmsegmentation.readthedocs.io/en/latest/get_started.html) | +[👀Model Zoo](https://mmsegmentation.readthedocs.io/en/latest/model_zoo.html) | +[🆕Update News](https://mmsegmentation.readthedocs.io/en/latest/changelog.html) | +[🤔Reporting Issues](https://github.com/open-mmlab/mmsegmentation/issues/new/choose) + +
+ +
+ +English | [简体中文](README_zh-CN.md) + +
+ +## Introduction + +MMSegmentation is an open source semantic segmentation toolbox based on PyTorch. +It is a part of the [OpenMMLab](https://openmmlab.com/) project. + +The master branch works with **PyTorch 1.5+**. + +![demo image](resources/seg_demo.gif) + +
+Major features + +- **Unified Benchmark** + + We provide a unified benchmark toolbox for various semantic segmentation methods. + +- **Modular Design** + + We decompose the semantic segmentation framework into different components and one can easily construct a customized semantic segmentation framework by combining different modules. + +- **Support of multiple methods out of box** + + The toolbox directly supports popular and contemporary semantic segmentation frameworks, *e.g.* PSPNet, DeepLabV3, PSANet, DeepLabV3+, etc. + +- **High efficiency** + + The training speed is faster than or comparable to other codebases. + +
+ +## What's New + +v0.25.0 was released in 6/2/2022: + +- Support PyTorch backend on MLU + +Please refer to [changelog.md](docs/en/changelog.md) for details and release history. + +## Installation + +Please refer to [get_started.md](docs/en/get_started.md#installation) for installation and [dataset_prepare.md](docs/en/dataset_prepare.md#prepare-datasets) for dataset preparation. + +## Get Started + +Please see [train.md](docs/en/train.md) and [inference.md](docs/en/inference.md) for the basic usage of MMSegmentation. +There are also tutorials for: + +- [customizing dataset](docs/en/tutorials/customize_datasets.md) +- [designing data pipeline](docs/en/tutorials/data_pipeline.md) +- [customizing modules](docs/en/tutorials/customize_models.md) +- [customizing runtime](docs/en/tutorials/customize_runtime.md) +- [training tricks](docs/en/tutorials/training_tricks.md) +- [useful tools](docs/en/useful_tools.md) + +A Colab tutorial is also provided. You may preview the notebook [here](demo/MMSegmentation_Tutorial.ipynb) or directly [run](https://colab.research.google.com/github/open-mmlab/mmsegmentation/blob/master/demo/MMSegmentation_Tutorial.ipynb) on Colab. + +## Benchmark and model zoo + +Results and models are available in the [model zoo](docs/en/model_zoo.md). + +Supported backbones: + +- [x] ResNet (CVPR'2016) +- [x] ResNeXt (CVPR'2017) +- [x] [HRNet (CVPR'2019)](configs/hrnet) +- [x] [ResNeSt (ArXiv'2020)](configs/resnest) +- [x] [MobileNetV2 (CVPR'2018)](configs/mobilenet_v2) +- [x] [MobileNetV3 (ICCV'2019)](configs/mobilenet_v3) +- [x] [Vision Transformer (ICLR'2021)](configs/vit) +- [x] [Swin Transformer (ICCV'2021)](configs/swin) +- [x] [Twins (NeurIPS'2021)](configs/twins) +- [x] [BEiT (ICLR'2022)](configs/beit) +- [x] [ConvNeXt (CVPR'2022)](configs/convnext) +- [x] [MAE (CVPR'2022)](configs/mae) + +Supported methods: + +- [x] [FCN (CVPR'2015/TPAMI'2017)](configs/fcn) +- [x] [ERFNet (T-ITS'2017)](configs/erfnet) +- [x] [UNet (MICCAI'2016/Nat. Methods'2019)](configs/unet) +- [x] [PSPNet (CVPR'2017)](configs/pspnet) +- [x] [DeepLabV3 (ArXiv'2017)](configs/deeplabv3) +- [x] [BiSeNetV1 (ECCV'2018)](configs/bisenetv1) +- [x] [PSANet (ECCV'2018)](configs/psanet) +- [x] [DeepLabV3+ (CVPR'2018)](configs/deeplabv3plus) +- [x] [UPerNet (ECCV'2018)](configs/upernet) +- [x] [ICNet (ECCV'2018)](configs/icnet) +- [x] [NonLocal Net (CVPR'2018)](configs/nonlocal_net) +- [x] [EncNet (CVPR'2018)](configs/encnet) +- [x] [Semantic FPN (CVPR'2019)](configs/sem_fpn) +- [x] [DANet (CVPR'2019)](configs/danet) +- [x] [APCNet (CVPR'2019)](configs/apcnet) +- [x] [EMANet (ICCV'2019)](configs/emanet) +- [x] [CCNet (ICCV'2019)](configs/ccnet) +- [x] [DMNet (ICCV'2019)](configs/dmnet) +- [x] [ANN (ICCV'2019)](configs/ann) +- [x] [GCNet (ICCVW'2019/TPAMI'2020)](configs/gcnet) +- [x] [FastFCN (ArXiv'2019)](configs/fastfcn) +- [x] [Fast-SCNN (ArXiv'2019)](configs/fastscnn) +- [x] [ISANet (ArXiv'2019/IJCV'2021)](configs/isanet) +- [x] [OCRNet (ECCV'2020)](configs/ocrnet) +- [x] [DNLNet (ECCV'2020)](configs/dnlnet) +- [x] [PointRend (CVPR'2020)](configs/point_rend) +- [x] [CGNet (TIP'2020)](configs/cgnet) +- [x] [BiSeNetV2 (IJCV'2021)](configs/bisenetv2) +- [x] [STDC (CVPR'2021)](configs/stdc) +- [x] [SETR (CVPR'2021)](configs/setr) +- [x] [DPT (ArXiv'2021)](configs/dpt) +- [x] [Segmenter (ICCV'2021)](configs/segmenter) +- [x] [SegFormer (NeurIPS'2021)](configs/segformer) +- [x] [K-Net (NeurIPS'2021)](configs/knet) + +Supported datasets: + +- [x] [Cityscapes](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#cityscapes) +- [x] [PASCAL VOC](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#pascal-voc) +- [x] [ADE20K](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#ade20k) +- [x] [Pascal Context](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#pascal-context) +- [x] [COCO-Stuff 10k](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#coco-stuff-10k) +- [x] [COCO-Stuff 164k](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#coco-stuff-164k) +- [x] [CHASE_DB1](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#chase-db1) +- [x] [DRIVE](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#drive) +- [x] [HRF](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#hrf) +- [x] [STARE](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#stare) +- [x] [Dark Zurich](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#dark-zurich) +- [x] [Nighttime Driving](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#nighttime-driving) +- [x] [LoveDA](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#loveda) +- [x] [Potsdam](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#isprs-potsdam) +- [x] [Vaihingen](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#isprs-vaihingen) +- [x] [iSAID](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/dataset_prepare.md#isaid) + +## FAQ + +Please refer to [FAQ](docs/en/faq.md) for frequently asked questions. + +## Contributing + +We appreciate all contributions to improve MMSegmentation. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guideline. + +## Acknowledgement + +MMSegmentation is an open source project that welcome any contribution and feedback. +We wish that the toolbox and benchmark could serve the growing research +community by providing a flexible as well as standardized toolkit to reimplement existing methods +and develop their own new semantic segmentation methods. + +## Citation + +If you find this project useful in your research, please consider cite: + +```bibtex +@misc{mmseg2020, + title={{MMSegmentation}: OpenMMLab Semantic Segmentation Toolbox and Benchmark}, + author={MMSegmentation Contributors}, + howpublished = {\url{https://github.com/open-mmlab/mmsegmentation}}, + year={2020} +} +``` + +## License + +MMSegmentation is released under the Apache 2.0 license, while some specific features in this library are with other licenses. Please refer to [LICENSES.md](LICENSES.md) for the careful check, if you are using our code for commercial matters. + +## Projects in OpenMMLab + +- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision. +- [MIM](https://github.com/open-mmlab/mim): MIM installs OpenMMLab packages. +- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark. +- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark. +- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection. +- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark. +- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark. +- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox. +- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab pose estimation toolbox and benchmark. +- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 3D human parametric model toolbox and benchmark. +- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab self-supervised learning toolbox and benchmark. +- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab model compression toolbox and benchmark. +- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab fewshot learning toolbox and benchmark. +- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab's next-generation action understanding toolbox and benchmark. +- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab video perception toolbox and benchmark. +- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab optical flow toolbox and benchmark. +- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox. +- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox. +- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab Model Deployment Framework. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/README_zh-CN.md b/prediction/image/mx15hdi/Detect/mmsegmentation/README_zh-CN.md new file mode 100644 index 0000000..f6f7d4c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/README_zh-CN.md @@ -0,0 +1,242 @@ +
+ +
 
+
+ OpenMMLab 官网 + + + HOT + + +      + OpenMMLab 开放平台 + + + TRY IT OUT + + +
+
 
+ +
+ +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/mmsegmentation)](https://pypi.org/project/mmsegmentation/) +[![PyPI](https://img.shields.io/pypi/v/mmsegmentation)](https://pypi.org/project/mmsegmentation) +[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmsegmentation.readthedocs.io/zh_CN/latest/) +[![badge](https://github.com/open-mmlab/mmsegmentation/workflows/build/badge.svg)](https://github.com/open-mmlab/mmsegmentation/actions) +[![codecov](https://codecov.io/gh/open-mmlab/mmsegmentation/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmsegmentation) +[![license](https://img.shields.io/github/license/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/blob/master/LICENSE) +[![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/issues) +[![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmsegmentation.svg)](https://github.com/open-mmlab/mmsegmentation/issues) + +[📘使用文档](https://mmsegmentation.readthedocs.io/en/latest/) | +[🛠️安装指南](https://mmsegmentation.readthedocs.io/en/latest/get_started.html) | +[👀模型库](https://mmsegmentation.readthedocs.io/en/latest/model_zoo.html) | +[🆕更新日志](https://mmsegmentation.readthedocs.io/en/latest/changelog.html) | +[🤔报告问题](https://github.com/open-mmlab/mmsegmentation/issues/new/choose) + +[English](README.md) | 简体中文 + +
+ +## 简介 + +MMSegmentation 是一个基于 PyTorch 的语义分割开源工具箱。它是 OpenMMLab 项目的一部分。 + +主分支代码目前支持 PyTorch 1.5 以上的版本。 + +![示例图片](resources/seg_demo.gif) + +
+Major features + +### 主要特性 + +- **统一的基准平台** + + 我们将各种各样的语义分割算法集成到了一个统一的工具箱,进行基准测试。 + +- **模块化设计** + + MMSegmentation 将分割框架解耦成不同的模块组件,通过组合不同的模块组件,用户可以便捷地构建自定义的分割模型。 + +- **丰富的即插即用的算法和模型** + + MMSegmentation 支持了众多主流的和最新的检测算法,例如 PSPNet,DeepLabV3,PSANet,DeepLabV3+ 等. + +- **速度快** + + 训练速度比其他语义分割代码库更快或者相当。 + +
+ +## 最新进展 + +最新版本 v0.25.0 在 2022.6.2 发布: + +- 支持 PyTorch MLU 后端 + +如果想了解更多版本更新细节和历史信息,请阅读[更新日志](docs/en/changelog.md)。 + +## 安装 + +请参考[快速入门文档](docs/zh_cn/get_started.md#installation)进行安装,参考[数据集准备](docs/zh_cn/dataset_prepare.md)处理数据。 + +## 快速入门 + +请参考[训练教程](docs/zh_cn/train.md)和[测试教程](docs/zh_cn/inference.md)学习 MMSegmentation 的基本使用。 +我们也提供了一些进阶教程,内容覆盖了: + +- [增加自定义数据集](docs/zh_cn/tutorials/customize_datasets.md) +- [设计新的数据预处理流程](docs/zh_cn/tutorials/data_pipeline.md) +- [增加自定义模型](docs/zh_cn/tutorials/customize_models.md) +- [增加自定义的运行时配置](docs/zh_cn/tutorials/customize_runtime.md)。 +- [训练技巧说明](docs/zh_cn/tutorials/training_tricks.md) +- [有用的工具](docs/zh_cn/useful_tools.md)。 + +同时,我们提供了 Colab 教程。你可以在[这里](demo/MMSegmentation_Tutorial.ipynb)浏览教程,或者直接在 Colab 上[运行](https://colab.research.google.com/github/open-mmlab/mmsegmentation/blob/master/demo/MMSegmentation_Tutorial.ipynb)。 + +## 基准测试和模型库 + +测试结果和模型可以在[模型库](docs/zh_cn/model_zoo.md)中找到。 + +已支持的骨干网络: + +- [x] ResNet (CVPR'2016) +- [x] ResNeXt (CVPR'2017) +- [x] [HRNet (CVPR'2019)](configs/hrnet) +- [x] [ResNeSt (ArXiv'2020)](configs/resnest) +- [x] [MobileNetV2 (CVPR'2018)](configs/mobilenet_v2) +- [x] [MobileNetV3 (ICCV'2019)](configs/mobilenet_v3) +- [x] [Vision Transformer (ICLR'2021)](configs/vit) +- [x] [Swin Transformer (ICCV'2021)](configs/swin) +- [x] [Twins (NeurIPS'2021)](configs/twins) +- [x] [BEiT (ICLR'2022)](configs/beit) +- [x] [ConvNeXt (CVPR'2022)](configs/convnext) +- [x] [MAE (CVPR'2022)](configs/mae) + +已支持的算法: + +- [x] [FCN (CVPR'2015/TPAMI'2017)](configs/fcn) +- [x] [ERFNet (T-ITS'2017)](configs/erfnet) +- [x] [UNet (MICCAI'2016/Nat. Methods'2019)](configs/unet) +- [x] [PSPNet (CVPR'2017)](configs/pspnet) +- [x] [DeepLabV3 (ArXiv'2017)](configs/deeplabv3) +- [x] [BiSeNetV1 (ECCV'2018)](configs/bisenetv1) +- [x] [PSANet (ECCV'2018)](configs/psanet) +- [x] [DeepLabV3+ (CVPR'2018)](configs/deeplabv3plus) +- [x] [UPerNet (ECCV'2018)](configs/upernet) +- [x] [ICNet (ECCV'2018)](configs/icnet) +- [x] [NonLocal Net (CVPR'2018)](configs/nonlocal_net) +- [x] [EncNet (CVPR'2018)](configs/encnet) +- [x] [Semantic FPN (CVPR'2019)](configs/sem_fpn) +- [x] [DANet (CVPR'2019)](configs/danet) +- [x] [APCNet (CVPR'2019)](configs/apcnet) +- [x] [EMANet (ICCV'2019)](configs/emanet) +- [x] [CCNet (ICCV'2019)](configs/ccnet) +- [x] [DMNet (ICCV'2019)](configs/dmnet) +- [x] [ANN (ICCV'2019)](configs/ann) +- [x] [GCNet (ICCVW'2019/TPAMI'2020)](configs/gcnet) +- [x] [FastFCN (ArXiv'2019)](configs/fastfcn) +- [x] [Fast-SCNN (ArXiv'2019)](configs/fastscnn) +- [x] [ISANet (ArXiv'2019/IJCV'2021)](configs/isanet) +- [x] [OCRNet (ECCV'2020)](configs/ocrnet) +- [x] [DNLNet (ECCV'2020)](configs/dnlnet) +- [x] [PointRend (CVPR'2020)](configs/point_rend) +- [x] [CGNet (TIP'2020)](configs/cgnet) +- [x] [BiSeNetV2 (IJCV'2021)](configs/bisenetv2) +- [x] [STDC (CVPR'2021)](configs/stdc) +- [x] [SETR (CVPR'2021)](configs/setr) +- [x] [DPT (ArXiv'2021)](configs/dpt) +- [x] [Segmenter (ICCV'2021)](configs/segmenter) +- [x] [SegFormer (NeurIPS'2021)](configs/segformer) +- [x] [K-Net (NeurIPS'2021)](configs/knet) + +已支持的数据集: + +- [x] [Cityscapes](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#cityscapes) +- [x] [PASCAL VOC](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#pascal-voc) +- [x] [ADE20K](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#ade20k) +- [x] [Pascal Context](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#pascal-context) +- [x] [COCO-Stuff 10k](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#coco-stuff-10k) +- [x] [COCO-Stuff 164k](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#coco-stuff-164k) +- [x] [CHASE_DB1](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#chase-db1) +- [x] [DRIVE](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#drive) +- [x] [HRF](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#hrf) +- [x] [STARE](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#stare) +- [x] [Dark Zurich](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#dark-zurich) +- [x] [Nighttime Driving](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#nighttime-driving) +- [x] [LoveDA](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#loveda) +- [x] [Potsdam](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#isprs-potsdam) +- [x] [Vaihingen](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#isprs-vaihingen) +- [x] [iSAID](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/dataset_prepare.md#isaid) + +## 常见问题 + +如果遇到问题,请参考 [常见问题解答](docs/zh_cn/faq.md)。 + +## 贡献指南 + +我们感谢所有的贡献者为改进和提升 MMSegmentation 所作出的努力。请参考[贡献指南](.github/CONTRIBUTING.md)来了解参与项目贡献的相关指引。 + +## 致谢 + +MMSegmentation 是一个由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者,以及提供宝贵反馈的用户。 我们希望这个工具箱和基准测试可以为社区提供灵活的代码工具,供用户复现已有算法并开发自己的新模型,从而不断为开源社区提供贡献。 + +## 引用 + +如果你觉得本项目对你的研究工作有所帮助,请参考如下 bibtex 引用 MMSegmentation。 + +```bibtex +@misc{mmseg2020, + title={{MMSegmentation}: OpenMMLab Semantic Segmentation Toolbox and Benchmark}, + author={MMSegmentation Contributors}, + howpublished = {\url{https://github.com/open-mmlab/mmsegmentation}}, + year={2020} +} +``` + +## 开源许可证 + +`MMSegmentation` 目前以 Apache 2.0 的许可证发布,但是其中有一部分功能并不是使用的 Apache2.0 许可证,我们在 [许可证](LICENSES.md) 中详细地列出了这些功能以及他们对应的许可证,如果您正在从事盈利性活动,请谨慎参考此文档。 + +## OpenMMLab 的其他项目 + +- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab 计算机视觉基础库 +- [MIM](https://github.com/open-mmlab/mim): MIM 是 OpenMMlab 项目、算法、模型的统一入口 +- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图像分类工具箱 +- [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱 +- [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平台 +- [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab 旋转框检测工具箱与测试基准 +- [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱 +- [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包 +- [MMPose](https://github.com/open-mmlab/mmpose): OpenMMLab 姿态估计工具箱 +- [MMHuman3D](https://github.com/open-mmlab/mmhuman3d): OpenMMLab 人体参数化模型工具箱与测试基准 +- [MMSelfSup](https://github.com/open-mmlab/mmselfsup): OpenMMLab 自监督学习工具箱与测试基准 +- [MMRazor](https://github.com/open-mmlab/mmrazor): OpenMMLab 模型压缩工具箱与测试基准 +- [MMFewShot](https://github.com/open-mmlab/mmfewshot): OpenMMLab 少样本学习工具箱与测试基准 +- [MMAction2](https://github.com/open-mmlab/mmaction2): OpenMMLab 新一代视频理解工具箱 +- [MMTracking](https://github.com/open-mmlab/mmtracking): OpenMMLab 一体化视频目标感知平台 +- [MMFlow](https://github.com/open-mmlab/mmflow): OpenMMLab 光流估计工具箱与测试基准 +- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab 图像视频编辑工具箱 +- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab 图片视频生成模型工具箱 +- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab 模型部署框架 + +## 欢迎加入 OpenMMLab 社区 + +扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab),加入 [OpenMMLab 团队](https://jq.qq.com/?_wv=1027&k=aCvMxdr3) 以及 [MMSegmentation](https://jq.qq.com/?_wv=1027&k=9sprS2YO) 的 QQ 群。 + +
+ +
+ +我们会在 OpenMMLab 社区为大家 + +- 📢 分享 AI 框架的前沿核心技术 +- 💻 解读 PyTorch 常用模块源码 +- 📰 发布 OpenMMLab 的相关新闻 +- 🚀 介绍 OpenMMLab 开发的前沿算法 +- 🏃 获取更高效的问题答疑和意见反馈 +- 🔥 提供与各行各业开发者充分交流的平台 + +干货满满 📘,等你来撩 💗,OpenMMLab 社区期待您的加入 👬 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/CustomDataset.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/CustomDataset.py new file mode 100644 index 0000000..24d8b43 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/CustomDataset.py @@ -0,0 +1,53 @@ +# dataset settings +dataset_type = 'CustomDataset' # need to change +data_root = 'data/my_dataset_v7' # need to change +img_norm_cfg = dict( + mean=[127.93135507, 116.76565979, 103.67335042], std=[49.55883976, 47.7692082, 50.7934459], to_rgb=True) # need to calculate +crop_size = (512, 512) # need to change +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), # need to change + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, # need to change + workers_per_gpu=1, # need to change + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/train', # need to change + ann_dir='ann_dir/train', # need to change + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/val',# need to change + ann_dir='ann_dir/val',# need to change + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/test',# need to change + ann_dir='ann_dir/test',# need to change + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/ade20k.py new file mode 100644 index 0000000..efc8b4b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/ade20k.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'ADE20KDataset' +data_root = 'data/ade/ADEChallengeData2016' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/ade20k_640x640.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/ade20k_640x640.py new file mode 100644 index 0000000..14a4bb0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/ade20k_640x640.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'ADE20KDataset' +data_root = 'data/ade/ADEChallengeData2016' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (640, 640) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2560, 640), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/chase_db1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/chase_db1.py new file mode 100644 index 0000000..298594e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/chase_db1.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'ChaseDB1Dataset' +data_root = 'data/CHASE_DB1' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (960, 999) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes.py new file mode 100644 index 0000000..f21867c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/train', + ann_dir='gtFine/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_1024x1024.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_1024x1024.py new file mode 100644 index 0000000..f98d929 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_1024x1024.py @@ -0,0 +1,35 @@ +_base_ = './cityscapes.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (1024, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_768x768.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_768x768.py new file mode 100644 index 0000000..fde9d7c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_768x768.py @@ -0,0 +1,35 @@ +_base_ = './cityscapes.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (768, 768) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2049, 1025), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_769x769.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_769x769.py new file mode 100644 index 0000000..336c7b2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_769x769.py @@ -0,0 +1,35 @@ +_base_ = './cityscapes.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (769, 769) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2049, 1025), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2049, 1025), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_832x832.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_832x832.py new file mode 100644 index 0000000..b9325cc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/cityscapes_832x832.py @@ -0,0 +1,35 @@ +_base_ = './cityscapes.py' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (832, 832) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/coco-stuff10k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/coco-stuff10k.py new file mode 100644 index 0000000..ec04969 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/coco-stuff10k.py @@ -0,0 +1,57 @@ +# dataset settings +dataset_type = 'COCOStuffDataset' +data_root = 'data/coco_stuff10k' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + reduce_zero_label=True, + img_dir='images/train2014', + ann_dir='annotations/train2014', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + reduce_zero_label=True, + img_dir='images/test2014', + ann_dir='annotations/test2014', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + reduce_zero_label=True, + img_dir='images/test2014', + ann_dir='annotations/test2014', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/coco-stuff164k.py new file mode 100644 index 0000000..a6a38f2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/coco-stuff164k.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'COCOStuffDataset' +data_root = 'data/coco_stuff164k' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/train2017', + ann_dir='annotations/train2017', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/val2017', + ann_dir='annotations/val2017', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/val2017', + ann_dir='annotations/val2017', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/drive.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/drive.py new file mode 100644 index 0000000..06e8ff6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/drive.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'DRIVEDataset' +data_root = 'data/DRIVE' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (584, 565) +crop_size = (64, 64) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/hrf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/hrf.py new file mode 100644 index 0000000..242d790 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/hrf.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'HRFDataset' +data_root = 'data/HRF' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (2336, 3504) +crop_size = (256, 256) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/isaid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/isaid.py new file mode 100644 index 0000000..8e4c26a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/isaid.py @@ -0,0 +1,62 @@ +# dataset settings +dataset_type = 'iSAIDDataset' +data_root = 'data/iSAID' + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +""" +This crop_size setting is followed by the implementation of +`PointFlow: Flowing Semantics Through Points for Aerial Image +Segmentation `_. +""" + +crop_size = (896, 896) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(896, 896), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(896, 896), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/loveda.py new file mode 100644 index 0000000..e553356 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/loveda.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'LoveDADataset' +data_root = 'data/loveDA' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1024, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_context.py new file mode 100644 index 0000000..ff65bad --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_context.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_context_59.py new file mode 100644 index 0000000..37585ab --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_context_59.py @@ -0,0 +1,60 @@ +# dataset settings +dataset_type = 'PascalContextDataset59' +data_root = 'data/VOCdevkit/VOC2010/' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +img_scale = (520, 520) +crop_size = (480, 480) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClassContext', + split='ImageSets/SegmentationContext/val.txt', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_voc12.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_voc12.py new file mode 100644 index 0000000..ba1d42d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_voc12.py @@ -0,0 +1,57 @@ +# dataset settings +dataset_type = 'PascalVOCDataset' +data_root = 'data/VOCdevkit/VOC2012' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/train.txt', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='JPEGImages', + ann_dir='SegmentationClass', + split='ImageSets/Segmentation/val.txt', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_voc12_aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_voc12_aug.py new file mode 100644 index 0000000..3f23b67 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/pascal_voc12_aug.py @@ -0,0 +1,9 @@ +_base_ = './pascal_voc12.py' +# dataset settings +data = dict( + train=dict( + ann_dir=['SegmentationClass', 'SegmentationClassAug'], + split=[ + 'ImageSets/Segmentation/train.txt', + 'ImageSets/Segmentation/aug.txt' + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/potsdam.py new file mode 100644 index 0000000..f74c4a5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/potsdam.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'PotsdamDataset' +data_root = 'data/potsdam' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/stare.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/stare.py new file mode 100644 index 0000000..3f71b25 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/stare.py @@ -0,0 +1,59 @@ +# dataset settings +dataset_type = 'STAREDataset' +data_root = 'data/STARE' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +img_scale = (605, 700) +crop_size = (128, 128) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=img_scale, ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale, + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75, 2.0], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='RepeatDataset', + times=40000, + dataset=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline)), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/vaihingen.py new file mode 100644 index 0000000..c0df282 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/datasets/vaihingen.py @@ -0,0 +1,54 @@ +# dataset settings +dataset_type = 'ISPRSDataset' +data_root = 'data/vaihingen' +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/default_runtime.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/default_runtime.py new file mode 100644 index 0000000..4459daa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/default_runtime.py @@ -0,0 +1,9 @@ +# yapf:enable +dist_params = dict(backend='nccl') +log_level = 'INFO' +#load_from = 'checkpoints/danet_r50-d8_512x512_80k_ade20k_20200615_015125-edb18e08.pth' +load_from = None +resume_from = None +#workflow = [('train', 1)] +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ann_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ann_r50-d8.py new file mode 100644 index 0000000..a2cb653 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ann_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ANNHead', + in_channels=[1024, 2048], + in_index=[2, 3], + channels=512, + project_channels=256, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/apcnet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/apcnet_r50-d8.py new file mode 100644 index 0000000..c8f5316 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/apcnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='APCHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/bisenetv1_r18-d32.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/bisenetv1_r18-d32.py new file mode 100644 index 0000000..4069864 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/bisenetv1_r18-d32.py @@ -0,0 +1,68 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='BiSeNetV1', + in_channels=3, + context_channels=(128, 256, 512), + spatial_channels=(64, 64, 64, 128), + out_indices=(0, 1, 2), + out_channels=256, + backbone_cfg=dict( + type='ResNet', + in_channels=3, + depth=18, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + norm_cfg=norm_cfg, + align_corners=False, + init_cfg=None), + decode_head=dict( + type='FCNHead', + in_channels=256, + in_index=0, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/bisenetv2.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/bisenetv2.py new file mode 100644 index 0000000..f8fffee --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/bisenetv2.py @@ -0,0 +1,80 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='BiSeNetV2', + detail_channels=(64, 64, 128), + semantic_channels=(16, 32, 64, 128), + semantic_expansion_ratio=6, + bga_channels=128, + out_indices=(0, 1, 2, 3, 4), + init_cfg=None, + align_corners=False), + decode_head=dict( + type='FCNHead', + in_channels=128, + in_index=0, + channels=1024, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=16, + channels=16, + num_convs=2, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=32, + channels=64, + num_convs=2, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=64, + channels=256, + num_convs=2, + num_classes=19, + in_index=3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=128, + channels=1024, + num_convs=2, + num_classes=19, + in_index=4, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ccnet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ccnet_r50-d8.py new file mode 100644 index 0000000..794148f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ccnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='CCHead', + in_channels=2048, + in_index=3, + channels=512, + recurrence=2, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/cgnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/cgnet.py new file mode 100644 index 0000000..eff8d94 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/cgnet.py @@ -0,0 +1,35 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=1e-03, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='CGNet', + norm_cfg=norm_cfg, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16)), + decode_head=dict( + type='FCNHead', + in_channels=256, + in_index=2, + channels=256, + num_convs=0, + concat_input=False, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0, + class_weight=[ + 2.5959933, 6.7415504, 3.5354059, 9.8663225, 9.690899, 9.369352, + 10.289121, 9.953208, 4.3097677, 9.490387, 7.674431, 9.396905, + 10.347791, 6.3927646, 10.226669, 10.241062, 10.280587, + 10.396974, 10.055647 + ])), + # model training and testing settings + train_cfg=dict(sampler=None), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/danet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/danet_r50-d8.py new file mode 100644 index 0000000..8753888 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/danet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, # Need to change + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, # Need to change + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/deeplabv3_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/deeplabv3_r50-d8.py new file mode 100644 index 0000000..d7a43be --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/deeplabv3_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/deeplabv3_unet_s5-d16.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/deeplabv3_unet_s5-d16.py new file mode 100644 index 0000000..0cd2629 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/deeplabv3_unet_s5-d16.py @@ -0,0 +1,50 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='ASPPHead', + in_channels=64, + in_index=4, + channels=16, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/deeplabv3plus_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/deeplabv3plus_r50-d8.py new file mode 100644 index 0000000..050e39e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/deeplabv3plus_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DepthwiseSeparableASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + c1_in_channels=256, + c1_channels=48, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/dmnet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/dmnet_r50-d8.py new file mode 100644 index 0000000..d22ba52 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/dmnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DMHead', + in_channels=2048, + in_index=3, + channels=512, + filter_sizes=(1, 3, 5, 7), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/dnl_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/dnl_r50-d8.py new file mode 100644 index 0000000..edb4c17 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/dnl_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DNLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/dpt_vit-b16.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/dpt_vit-b16.py new file mode 100644 index 0000000..dfd48a9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/dpt_vit-b16.py @@ -0,0 +1,31 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='pretrain/vit-b16_p16_224-80ecf9dd.pth', # noqa + backbone=dict( + type='VisionTransformer', + img_size=224, + embed_dims=768, + num_layers=12, + num_heads=12, + out_indices=(2, 5, 8, 11), + final_norm=False, + with_cls_token=True, + output_cls_token=True), + decode_head=dict( + type='DPTHead', + in_channels=(768, 768, 768, 768), + channels=256, + embed_dims=768, + post_process_channels=[96, 192, 384, 768], + num_classes=150, + readout_type='project', + input_transform='multiple_select', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=None, + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) # yapf: disable diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/emanet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/emanet_r50-d8.py new file mode 100644 index 0000000..26adcd4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/emanet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EMAHead', + in_channels=2048, + in_index=3, + channels=256, + ema_channels=512, + num_bases=64, + num_stages=3, + momentum=0.1, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/encnet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/encnet_r50-d8.py new file mode 100644 index 0000000..be77712 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/encnet_r50-d8.py @@ -0,0 +1,48 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(1, 2, 3), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/erfnet_fcn.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/erfnet_fcn.py new file mode 100644 index 0000000..7f2e9bf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/erfnet_fcn.py @@ -0,0 +1,32 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='ERFNet', + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + init_cfg=None), + decode_head=dict( + type='FCNHead', + in_channels=16, + channels=128, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fast_scnn.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fast_scnn.py new file mode 100644 index 0000000..8e89d91 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fast_scnn.py @@ -0,0 +1,57 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True, momentum=0.01) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='FastSCNN', + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + norm_cfg=norm_cfg, + align_corners=False), + decode_head=dict( + type='DepthwiseSeparableFCNHead', + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=32, + num_convs=1, + num_classes=19, + in_index=-2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=64, + channels=32, + num_convs=1, + num_classes=19, + in_index=-3, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py new file mode 100644 index 0000000..9dc8609 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fastfcn_r50-d32_jpu_psp.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + dilations=(1, 1, 2, 4), + strides=(1, 2, 2, 2), + out_indices=(1, 2, 3), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='JPU', + in_channels=(512, 1024, 2048), + mid_channels=512, + start_level=0, + end_level=-1, + dilations=(1, 2, 4, 8), + align_corners=False, + norm_cfg=norm_cfg), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=2, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=1, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fcn_hr18.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fcn_hr18.py new file mode 100644 index 0000000..c3e299b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fcn_hr18.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + channels=sum([18, 36, 72, 144]), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fcn_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fcn_r50-d8.py new file mode 100644 index 0000000..5e98f6c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fcn_r50-d8.py @@ -0,0 +1,45 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='FCNHead', + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fcn_unet_s5-d16.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fcn_unet_s5-d16.py new file mode 100644 index 0000000..a33e797 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fcn_unet_s5-d16.py @@ -0,0 +1,51 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='FCNHead', + in_channels=64, + in_index=4, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fpn_r50.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fpn_r50.py new file mode 100644 index 0000000..86ab327 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/fpn_r50.py @@ -0,0 +1,36 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/gcnet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/gcnet_r50-d8.py new file mode 100644 index 0000000..3d2ad69 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/gcnet_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='GCHead', + in_channels=2048, + in_index=3, + channels=512, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/icnet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/icnet_r50-d8.py new file mode 100644 index 0000000..d7273cd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/icnet_r50-d8.py @@ -0,0 +1,74 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='ICNet', + backbone_cfg=dict( + type='ResNetV1c', + in_channels=3, + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + in_channels=3, + layer_channels=(512, 2048), + light_branch_middle_channels=32, + psp_out_channels=512, + out_channels=(64, 256, 256), + norm_cfg=norm_cfg, + align_corners=False, + ), + neck=dict( + type='ICNeck', + in_channels=(64, 256, 256), + out_channels=128, + norm_cfg=norm_cfg, + align_corners=False), + decode_head=dict( + type='FCNHead', + in_channels=128, + channels=128, + num_convs=1, + in_index=2, + dropout_ratio=0, + num_classes=19, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=128, + num_convs=1, + num_classes=19, + in_index=0, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=128, + channels=128, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/isanet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/isanet_r50-d8.py new file mode 100644 index 0000000..c0221a3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/isanet_r50-d8.py @@ -0,0 +1,45 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='ISAHead', + in_channels=2048, + in_index=3, + channels=512, + isa_channels=256, + down_factor=(8, 8), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/lraspp_m-v3-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/lraspp_m-v3-d8.py new file mode 100644 index 0000000..9325824 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/lraspp_m-v3-d8.py @@ -0,0 +1,25 @@ +# model settings +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='MobileNetV3', + arch='large', + out_indices=(1, 3, 16), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 24, 960), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/nonlocal_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/nonlocal_r50-d8.py new file mode 100644 index 0000000..5674a39 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/nonlocal_r50-d8.py @@ -0,0 +1,46 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='NLHead', + in_channels=2048, + in_index=3, + channels=512, + dropout_ratio=0.1, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ocrnet_hr18.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ocrnet_hr18.py new file mode 100644 index 0000000..c60f62a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ocrnet_hr18.py @@ -0,0 +1,68 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://msra/hrnetv2_w18', + backbone=dict( + type='HRNet', + norm_cfg=norm_cfg, + norm_eval=False, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(18, 36)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(18, 36, 72)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(18, 36, 72, 144)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ocrnet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ocrnet_r50-d8.py new file mode 100644 index 0000000..615aa3f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/ocrnet_r50-d8.py @@ -0,0 +1,47 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=[ + dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=2048, + in_index=3, + channels=512, + ocr_channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/pointrend_r50.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/pointrend_r50.py new file mode 100644 index 0000000..9d323db --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/pointrend_r50.py @@ -0,0 +1,56 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='CascadeEncoderDecoder', + num_stages=2, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=4), + decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ], + # model training and testing settings + train_cfg=dict( + num_points=2048, oversample_ratio=3, importance_sample_ratio=0.75), + test_cfg=dict( + mode='whole', + subdivision_steps=2, + subdivision_num_points=8196, + scale_factor=2)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/psanet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/psanet_r50-d8.py new file mode 100644 index 0000000..689513f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/psanet_r50-d8.py @@ -0,0 +1,49 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSAHead', + in_channels=2048, + in_index=3, + channels=512, + mask_size=(97, 97), + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/pspnet_r50-d8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/pspnet_r50-d8.py new file mode 100644 index 0000000..f451e08 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/pspnet_r50-d8.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/pspnet_unet_s5-d16.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/pspnet_unet_s5-d16.py new file mode 100644 index 0000000..fcff9ec --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/pspnet_unet_s5-d16.py @@ -0,0 +1,50 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='PSPHead', + in_channels=64, + in_index=4, + channels=16, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='slide', crop_size=256, stride=170)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/segformer_mit-b0.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/segformer_mit-b0.py new file mode 100644 index 0000000..5b3e073 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/segformer_mit-b0.py @@ -0,0 +1,34 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='MixVisionTransformer', + in_channels=3, + embed_dims=32, + num_stages=4, + num_layers=[2, 2, 2, 2], + num_heads=[1, 2, 5, 8], + patch_sizes=[7, 3, 3, 3], + sr_ratios=[8, 4, 2, 1], + out_indices=(0, 1, 2, 3), + mlp_ratio=4, + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1), + decode_head=dict( + type='SegformerHead', + in_channels=[32, 64, 160, 256], + in_index=[0, 1, 2, 3], + channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/segmenter_vit-b16_mask.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/segmenter_vit-b16_mask.py new file mode 100644 index 0000000..622f122 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/segmenter_vit-b16_mask.py @@ -0,0 +1,36 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_base_p16_384_20220308-96dfe169.pth' # noqa +# model settings +backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=checkpoint, + backbone=dict( + type='VisionTransformer', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + drop_path_rate=0.1, + attn_drop_rate=0.0, + drop_rate=0.0, + final_norm=True, + norm_cfg=backbone_norm_cfg, + with_cls_token=True, + interpolate_mode='bicubic', + ), + decode_head=dict( + type='SegmenterMaskTransformerHead', + in_channels=768, + channels=768, + num_classes=150, + num_layers=2, + num_heads=12, + embed_dims=768, + dropout_ratio=0.0, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + ), + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(480, 480)), +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/setr_mla.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/setr_mla.py new file mode 100644 index 0000000..af4ba24 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/setr_mla.py @@ -0,0 +1,95 @@ +# model settings +backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', + backbone=dict( + type='VisionTransformer', + img_size=(768, 768), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + out_indices=(5, 11, 17, 23), + drop_rate=0.1, + norm_cfg=backbone_norm_cfg, + with_cls_token=False, + interpolate_mode='bilinear', + ), + neck=dict( + type='MLANeck', + in_channels=[1024, 1024, 1024, 1024], + out_channels=256, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + ), + decode_head=dict( + type='SETRMLAHead', + in_channels=(256, 256, 256, 256), + channels=512, + in_index=(0, 1, 2, 3), + dropout_ratio=0, + mla_channels=128, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=0, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=1, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=2, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=3, + dropout_ratio=0, + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/setr_naive.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/setr_naive.py new file mode 100644 index 0000000..0c330ea --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/setr_naive.py @@ -0,0 +1,80 @@ +# model settings +backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', + backbone=dict( + type='VisionTransformer', + img_size=(768, 768), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + out_indices=(9, 14, 19, 23), + drop_rate=0.1, + norm_cfg=backbone_norm_cfg, + with_cls_token=True, + interpolate_mode='bilinear', + ), + decode_head=dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=3, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) + ], + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/setr_pup.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/setr_pup.py new file mode 100644 index 0000000..8e5f23b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/setr_pup.py @@ -0,0 +1,80 @@ +# model settings +backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='pretrain/jx_vit_large_p16_384-b3be5167.pth', + backbone=dict( + type='VisionTransformer', + img_size=(768, 768), + patch_size=16, + in_channels=3, + embed_dims=1024, + num_layers=24, + num_heads=16, + out_indices=(9, 14, 19, 23), + drop_rate=0.1, + norm_cfg=backbone_norm_cfg, + with_cls_token=True, + interpolate_mode='bilinear', + ), + decode_head=dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=3, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=4, + up_scale=2, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=1, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/stdc.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/stdc.py new file mode 100644 index 0000000..341a4ec --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/stdc.py @@ -0,0 +1,83 @@ +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='STDCContextPathNet', + backbone_cfg=dict( + type='STDCNet', + stdc_type='STDCNet1', + in_channels=3, + channels=(32, 64, 256, 512, 1024), + bottleneck_type='cat', + num_convs=4, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + with_final_conv=False), + last_in_channels=(1024, 512), + out_channels=128, + ffm_cfg=dict(in_channels=384, out_channels=256, scale_factor=4)), + decode_head=dict( + type='FCNHead', + in_channels=256, + channels=256, + num_convs=1, + num_classes=19, + in_index=3, + concat_input=False, + dropout_ratio=0.1, + norm_cfg=norm_cfg, + align_corners=True, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='FCNHead', + in_channels=128, + channels=64, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=False, + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000), + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='STDCHead', + in_channels=256, + channels=64, + num_convs=1, + num_classes=2, + boundary_threshold=0.1, + in_index=0, + norm_cfg=norm_cfg, + concat_input=False, + align_corners=True, + loss_decode=[ + dict( + type='CrossEntropyLoss', + loss_name='loss_ce', + use_sigmoid=True, + loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=1.0) + ]), + ], + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/twins_pcpvt-s_fpn.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/twins_pcpvt-s_fpn.py new file mode 100644 index 0000000..0f4488a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/twins_pcpvt-s_fpn.py @@ -0,0 +1,45 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa + +# model settings +backbone_norm_cfg = dict(type='LN') +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='PCPVT', + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + in_channels=3, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + patch_sizes=[4, 2, 2, 2], + strides=[4, 2, 2, 2], + mlp_ratios=[8, 8, 4, 4], + out_indices=(0, 1, 2, 3), + qkv_bias=True, + norm_cfg=backbone_norm_cfg, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + norm_after_stage=False, + drop_rate=0.0, + attn_drop_rate=0., + drop_path_rate=0.2), + neck=dict( + type='FPN', + in_channels=[64, 128, 320, 512], + out_channels=256, + num_outs=4), + decode_head=dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/twins_pcpvt-s_upernet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/twins_pcpvt-s_upernet.py new file mode 100644 index 0000000..14a74b9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/twins_pcpvt-s_upernet.py @@ -0,0 +1,53 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_small_20220308-e638c41c.pth' # noqa + +# model settings +backbone_norm_cfg = dict(type='LN') +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='PCPVT', + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + in_channels=3, + embed_dims=[64, 128, 320, 512], + num_heads=[1, 2, 5, 8], + patch_sizes=[4, 2, 2, 2], + strides=[4, 2, 2, 2], + mlp_ratios=[8, 8, 4, 4], + out_indices=(0, 1, 2, 3), + qkv_bias=True, + norm_cfg=backbone_norm_cfg, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + norm_after_stage=False, + drop_rate=0.0, + attn_drop_rate=0., + drop_path_rate=0.2), + decode_head=dict( + type='UPerHead', + in_channels=[64, 128, 320, 512], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=320, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_beit.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_beit.py new file mode 100644 index 0000000..9c5bfa3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_beit.py @@ -0,0 +1,50 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='BEiT', + img_size=(640, 640), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=(3, 5, 7, 11), + qv_bias=True, + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + init_values=0.1), + neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), + decode_head=dict( + type='UPerHead', + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=768, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=768, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_convnext.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_convnext.py new file mode 100644 index 0000000..36b882f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_convnext.py @@ -0,0 +1,44 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +custom_imports = dict(imports='mmcls.models', allow_failed_imports=False) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth' # noqa +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='mmcls.ConvNeXt', + arch='base', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + type='UPerHead', + in_channels=[128, 256, 512, 1024], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=384, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_mae.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_mae.py new file mode 100644 index 0000000..1e0da70 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_mae.py @@ -0,0 +1,49 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='MAE', + img_size=(640, 640), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=(3, 5, 7, 11), + attn_drop_rate=0.0, + drop_path_rate=0.1, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + init_values=0.1), + neck=dict(type='Feature2Pyramid', embed_dim=768, rescales=[4, 2, 1, 0.5]), + decode_head=dict( + type='UPerHead', + in_channels=[384, 384, 384, 384], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=384, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_r50.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_r50.py new file mode 100644 index 0000000..1097496 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_r50.py @@ -0,0 +1,44 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='UPerHead', + in_channels=[256, 512, 1024, 2048], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_swin.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_swin.py new file mode 100644 index 0000000..71b5162 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_swin.py @@ -0,0 +1,54 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +backbone_norm_cfg = dict(type='LN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained=None, + backbone=dict( + type='SwinTransformer', + pretrain_img_size=224, + embed_dims=96, + patch_size=4, + window_size=7, + mlp_ratio=4, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + strides=(4, 2, 2, 2), + out_indices=(0, 1, 2, 3), + qkv_bias=True, + qk_scale=None, + patch_norm=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + use_abs_pos_embed=False, + act_cfg=dict(type='GELU'), + norm_cfg=backbone_norm_cfg), + decode_head=dict( + type='UPerHead', + in_channels=[96, 192, 384, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=384, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_vit-b16_ln_mln.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_vit-b16_ln_mln.py new file mode 100644 index 0000000..cd6587d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/models/upernet_vit-b16_ln_mln.py @@ -0,0 +1,57 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='pretrain/jx_vit_base_p16_224-80ecf9dd.pth', + backbone=dict( + type='VisionTransformer', + img_size=(512, 512), + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=(2, 5, 8, 11), + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.0, + with_cls_token=True, + norm_cfg=dict(type='LN', eps=1e-6), + act_cfg=dict(type='GELU'), + norm_eval=False, + interpolate_mode='bicubic'), + neck=dict( + type='MultiLevelNeck', + in_channels=[768, 768, 768, 768], + out_channels=768, + scales=[4, 2, 1, 0.5]), + decode_head=dict( + type='UPerHead', + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=768, + in_index=3, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) # yapf: disable diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_160k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_160k.py new file mode 100644 index 0000000..39630f2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_160k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=160000) +checkpoint_config = dict(by_epoch=False, interval=16000) +evaluation = dict(interval=16000, metric='mIoU', pre_eval=True) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_20k.py new file mode 100644 index 0000000..73c7021 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_20k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=20000) +checkpoint_config = dict(by_epoch=False, interval=2000) +evaluation = dict(interval=2000, metric='mIoU', pre_eval=True) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_320k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_320k.py new file mode 100644 index 0000000..a0b2306 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_320k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=320000) +checkpoint_config = dict(by_epoch=False, interval=32000) +evaluation = dict(interval=32000, metric='mIoU') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_40k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_40k.py new file mode 100644 index 0000000..9594b6f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_40k.py @@ -0,0 +1,35 @@ +# optimizer +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0),#??? Tại sao lại như này? + norm=dict(decay_mult=0.0),#??? Tại sao lại như này? + head=dict(lr_mult=10.0)))) # lr of head = 10 times backbone +optimizer_config = dict() +# learning policy +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +# runtime settings +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +#evaluation = dict(interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict(project='Oil_Spill', name='V7_REV5_Compressdata')) + ]) +auto_resume = False \ No newline at end of file diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_80k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_80k.py new file mode 100644 index 0000000..8365a87 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/_base_/schedules/schedule_80k.py @@ -0,0 +1,9 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +runner = dict(type='IterBasedRunner', max_iters=80000) +checkpoint_config = dict(by_epoch=False, interval=8000) +evaluation = dict(interval=8000, metric='mIoU', pre_eval=True) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/README.md new file mode 100644 index 0000000..ba4cfe2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/README.md @@ -0,0 +1,68 @@ +# ANN + +[Asymmetric Non-local Neural Networks for Semantic Segmentation](https://arxiv.org/abs/1908.07678) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The non-local module works as a particularly useful technique for semantic segmentation while criticized for its prohibitive computation and GPU memory occupation. In this paper, we present Asymmetric Non-local Neural Network to semantic segmentation, which has two prominent components: Asymmetric Pyramid Non-local Block (APNB) and Asymmetric Fusion Non-local Block (AFNB). APNB leverages a pyramid sampling module into the non-local block to largely reduce the computation and memory consumption without sacrificing the performance. AFNB is adapted from APNB to fuse the features of different levels under a sufficient consideration of long range dependencies and thus considerably improves the performance. Extensive experiments on semantic segmentation benchmarks demonstrate the effectiveness and efficiency of our work. In particular, we report the state-of-the-art performance of 81.3 mIoU on the Cityscapes test set. For a 256x128 input, APNB is around 6 times faster than a non-local block on GPU while 28 times smaller in GPU running memory occupation. Code is available at: [this https URL](https://github.com/MendelXu/ANN). + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{zhu2019asymmetric, + title={Asymmetric non-local neural networks for semantic segmentation}, + author={Zhu, Zhen and Xu, Mengde and Bai, Song and Huang, Tengteng and Bai, Xiang}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={593--602}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ANN | R-50-D8 | 512x1024 | 40000 | 6 | 3.71 | 77.40 | 78.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211.log.json) | +| ANN | R-101-D8 | 512x1024 | 40000 | 9.5 | 2.55 | 76.55 | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243-adf6eece.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243.log.json) | +| ANN | R-50-D8 | 769x769 | 40000 | 6.8 | 1.70 | 78.89 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712-2b46b04d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712.log.json) | +| ANN | R-101-D8 | 769x769 | 40000 | 10.7 | 1.15 | 79.32 | 80.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720-059bff28.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720.log.json) | +| ANN | R-50-D8 | 512x1024 | 80000 | - | - | 77.34 | 78.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911-5a9ad545.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911.log.json) | +| ANN | R-101-D8 | 512x1024 | 80000 | - | - | 77.14 | 78.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728-aceccc6e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728.log.json) | +| ANN | R-50-D8 | 769x769 | 80000 | - | - | 78.88 | 80.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426-cc7ff323.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426.log.json) | +| ANN | R-101-D8 | 769x769 | 80000 | - | - | 78.80 | 80.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713-a9d4be8d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ANN | R-50-D8 | 512x512 | 80000 | 9.1 | 21.01 | 41.01 | 42.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818-26f75e11.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818.log.json) | +| ANN | R-101-D8 | 512x512 | 80000 | 12.5 | 14.12 | 42.94 | 44.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818-c0153543.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818.log.json) | +| ANN | R-50-D8 | 512x512 | 160000 | - | - | 41.74 | 42.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733-892247bc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733.log.json) | +| ANN | R-101-D8 | 512x512 | 160000 | - | - | 42.94 | 44.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733-955eb1ec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ANN | R-50-D8 | 512x512 | 20000 | 6 | 20.92 | 74.86 | 76.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246-dfcb1c62.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246.log.json) | +| ANN | R-101-D8 | 512x512 | 20000 | 9.5 | 13.94 | 77.47 | 78.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246-2fad0042.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246.log.json) | +| ANN | R-50-D8 | 512x512 | 40000 | - | - | 76.56 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314-b5dac322.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314.log.json) | +| ANN | R-101-D8 | 512x512 | 40000 | - | - | 76.70 | 78.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314-bd205bbe.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann.yml new file mode 100644 index 0000000..ff6bea6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann.yml @@ -0,0 +1,305 @@ +Collections: +- Name: ANN + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + URL: https://arxiv.org/abs/1908.07678 + Title: Asymmetric Non-local Neural Networks for Semantic Segmentation + README: configs/ann/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ann_head.py#L185 + Version: v0.17.0 + Converted From: + Code: https://github.com/MendelXu/ANN +Models: +- Name: ann_r50-d8_512x1024_40k_cityscapes + In Collection: ANN + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 269.54 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.4 + mIoU(ms+flip): 78.57 + Config: configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_40k_cityscapes/ann_r50-d8_512x1024_40k_cityscapes_20200605_095211-049fc292.pth +- Name: ann_r101-d8_512x1024_40k_cityscapes + In Collection: ANN + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 392.16 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.55 + mIoU(ms+flip): 78.85 + Config: configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_40k_cityscapes/ann_r101-d8_512x1024_40k_cityscapes_20200605_095243-adf6eece.pth +- Name: ann_r50-d8_769x769_40k_cityscapes + In Collection: ANN + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 588.24 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.89 + mIoU(ms+flip): 80.46 + Config: configs/ann/ann_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_40k_cityscapes/ann_r50-d8_769x769_40k_cityscapes_20200530_025712-2b46b04d.pth +- Name: ann_r101-d8_769x769_40k_cityscapes + In Collection: ANN + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 869.57 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.32 + mIoU(ms+flip): 80.94 + Config: configs/ann/ann_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_40k_cityscapes/ann_r101-d8_769x769_40k_cityscapes_20200530_025720-059bff28.pth +- Name: ann_r50-d8_512x1024_80k_cityscapes + In Collection: ANN + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.34 + mIoU(ms+flip): 78.65 + Config: configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x1024_80k_cityscapes/ann_r50-d8_512x1024_80k_cityscapes_20200607_101911-5a9ad545.pth +- Name: ann_r101-d8_512x1024_80k_cityscapes + In Collection: ANN + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.14 + mIoU(ms+flip): 78.81 + Config: configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x1024_80k_cityscapes/ann_r101-d8_512x1024_80k_cityscapes_20200607_013728-aceccc6e.pth +- Name: ann_r50-d8_769x769_80k_cityscapes + In Collection: ANN + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.88 + mIoU(ms+flip): 80.57 + Config: configs/ann/ann_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_769x769_80k_cityscapes/ann_r50-d8_769x769_80k_cityscapes_20200607_044426-cc7ff323.pth +- Name: ann_r101-d8_769x769_80k_cityscapes + In Collection: ANN + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.8 + mIoU(ms+flip): 80.34 + Config: configs/ann/ann_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_769x769_80k_cityscapes/ann_r101-d8_769x769_80k_cityscapes_20200607_013713-a9d4be8d.pth +- Name: ann_r50-d8_512x512_80k_ade20k + In Collection: ANN + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 47.6 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.01 + mIoU(ms+flip): 42.3 + Config: configs/ann/ann_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_80k_ade20k/ann_r50-d8_512x512_80k_ade20k_20200615_014818-26f75e11.pth +- Name: ann_r101-d8_512x512_80k_ade20k + In Collection: ANN + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 70.82 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.5 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.94 + mIoU(ms+flip): 44.18 + Config: configs/ann/ann_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_80k_ade20k/ann_r101-d8_512x512_80k_ade20k_20200615_014818-c0153543.pth +- Name: ann_r50-d8_512x512_160k_ade20k + In Collection: ANN + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.74 + mIoU(ms+flip): 42.62 + Config: configs/ann/ann_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_160k_ade20k/ann_r50-d8_512x512_160k_ade20k_20200615_231733-892247bc.pth +- Name: ann_r101-d8_512x512_160k_ade20k + In Collection: ANN + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.94 + mIoU(ms+flip): 44.06 + Config: configs/ann/ann_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_160k_ade20k/ann_r101-d8_512x512_160k_ade20k_20200615_231733-955eb1ec.pth +- Name: ann_r50-d8_512x512_20k_voc12aug + In Collection: ANN + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 47.8 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.0 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.86 + mIoU(ms+flip): 76.13 + Config: configs/ann/ann_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_20k_voc12aug/ann_r50-d8_512x512_20k_voc12aug_20200617_222246-dfcb1c62.pth +- Name: ann_r101-d8_512x512_20k_voc12aug + In Collection: ANN + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 71.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.5 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.47 + mIoU(ms+flip): 78.7 + Config: configs/ann/ann_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_20k_voc12aug/ann_r101-d8_512x512_20k_voc12aug_20200617_222246-2fad0042.pth +- Name: ann_r50-d8_512x512_40k_voc12aug + In Collection: ANN + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.56 + mIoU(ms+flip): 77.51 + Config: configs/ann/ann_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r50-d8_512x512_40k_voc12aug/ann_r50-d8_512x512_40k_voc12aug_20200613_231314-b5dac322.pth +- Name: ann_r101-d8_512x512_40k_voc12aug + In Collection: ANN + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.7 + mIoU(ms+flip): 78.06 + Config: configs/ann/ann_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ann/ann_r101-d8_512x512_40k_voc12aug/ann_r101-d8_512x512_40k_voc12aug_20200613_231314-bd205bbe.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..d494e07 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..1eeff0b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..9e43af5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..d854f2e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..893c53b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..a64dac6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..5950824 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..a9c712d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ann_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..00b2594 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..ef7b369 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..ca6bb24 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..071f190 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..82a1c93 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..5e04aa7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..4912bdb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..d1cc072 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ann/ann_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/ann_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/README.md new file mode 100644 index 0000000..f101a02 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/README.md @@ -0,0 +1,59 @@ +# APCNet + +[Adaptive Pyramid Context Network for Semantic Segmentation](https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Recent studies witnessed that context features can significantly improve the performance of deep semantic segmentation networks. Current context based segmentation methods differ with each other in how to construct context features and perform differently in practice. This paper firstly introduces three desirable properties of context features in segmentation task. Specially, we find that Global-guided Local Affinity (GLA) can play a vital role in constructing effective context features, while this property has been largely ignored in previous works. Based on this analysis, this paper proposes Adaptive Pyramid Context Network (APCNet)for semantic segmentation. APCNet adaptively constructs multi-scale contextual representations with multiple welldesigned Adaptive Context Modules (ACMs). Specifically, each ACM leverages a global image representation as a guidance to estimate the local affinity coefficients for each sub-region, and then calculates a context vector with these affinities. We empirically evaluate our APCNet on three semantic segmentation and scene parsing datasets, including PASCAL VOC 2012, Pascal-Context, and ADE20K dataset. Experimental results show that APCNet achieves state-ofthe-art performance on all three benchmarks, and obtains a new record 84.2% on PASCAL VOC 2012 test set without MS COCO pre-trained and any post-processing. + + + +
+ +
+ +## Citation + +```bibtex +@InProceedings{He_2019_CVPR, +author = {He, Junjun and Deng, Zhongying and Zhou, Lei and Wang, Yali and Qiao, Yu}, +title = {Adaptive Pyramid Context Network for Semantic Segmentation}, +booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| APCNet | R-50-D8 | 512x1024 | 40000 | 7.7 | 3.57 | 78.02 | 79.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes_20201214_115717-5e88fa33.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes-20201214_115717.log.json) | +| APCNet | R-101-D8 | 512x1024 | 40000 | 11.2 | 2.15 | 79.08 | 80.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes_20201214_115716-abc9d111.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes-20201214_115716.log.json) | +| APCNet | R-50-D8 | 769x769 | 40000 | 8.7 | 1.52 | 77.89 | 79.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes_20201214_115717-2a2628d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes-20201214_115717.log.json) | +| APCNet | R-101-D8 | 769x769 | 40000 | 12.7 | 1.03 | 77.96 | 79.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes_20201214_115718-b650de90.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes-20201214_115718.log.json) | +| APCNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.96 | 79.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes_20201214_115716-987f51e3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes-20201214_115716.log.json) | +| APCNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.64 | 80.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes_20201214_115705-b1ff208a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes-20201214_115705.log.json) | +| APCNet | R-50-D8 | 769x769 | 80000 | - | - | 78.79 | 80.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes_20201214_115718-7ea9fa12.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes-20201214_115718.log.json) | +| APCNet | R-101-D8 | 769x769 | 80000 | - | - | 78.45 | 79.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes_20201214_115716-a7fbc2ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes-20201214_115716.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| APCNet | R-50-D8 | 512x512 | 80000 | 10.1 | 19.61 | 42.20 | 43.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k_20201214_115705-a8626293.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k-20201214_115705.log.json) | +| APCNet | R-101-D8 | 512x512 | 80000 | 13.6 | 13.10 | 45.54 | 46.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k_20201214_115704-c656c3fb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k-20201214_115704.log.json) | +| APCNet | R-50-D8 | 512x512 | 160000 | - | - | 43.40 | 43.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k_20201214_115706-25fb92c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k-20201214_115706.log.json) | +| APCNet | R-101-D8 | 512x512 | 160000 | - | - | 45.41 | 46.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k_20201214_115705-73f9a8d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k-20201214_115705.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet.yml new file mode 100644 index 0000000..7a453a3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet.yml @@ -0,0 +1,232 @@ +Collections: +- Name: APCNet + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + URL: https://openaccess.thecvf.com/content_CVPR_2019/html/He_Adaptive_Pyramid_Context_Network_for_Semantic_Segmentation_CVPR_2019_paper.html + Title: Adaptive Pyramid Context Network for Semantic Segmentation + README: configs/apcnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Version: v0.17.0 + Converted From: + Code: https://github.com/Junjun2016/APCNet +Models: +- Name: apcnet_r50-d8_512x1024_40k_cityscapes + In Collection: APCNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 280.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 7.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.02 + mIoU(ms+flip): 79.26 + Config: configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes/apcnet_r50-d8_512x1024_40k_cityscapes_20201214_115717-5e88fa33.pth +- Name: apcnet_r101-d8_512x1024_40k_cityscapes + In Collection: APCNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 465.12 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 11.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.08 + mIoU(ms+flip): 80.34 + Config: configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes/apcnet_r101-d8_512x1024_40k_cityscapes_20201214_115716-abc9d111.pth +- Name: apcnet_r50-d8_769x769_40k_cityscapes + In Collection: APCNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 657.89 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 8.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.89 + mIoU(ms+flip): 79.75 + Config: configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_40k_cityscapes/apcnet_r50-d8_769x769_40k_cityscapes_20201214_115717-2a2628d7.pth +- Name: apcnet_r101-d8_769x769_40k_cityscapes + In Collection: APCNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 970.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 12.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.96 + mIoU(ms+flip): 79.24 + Config: configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_40k_cityscapes/apcnet_r101-d8_769x769_40k_cityscapes_20201214_115718-b650de90.pth +- Name: apcnet_r50-d8_512x1024_80k_cityscapes + In Collection: APCNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.96 + mIoU(ms+flip): 79.94 + Config: configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes/apcnet_r50-d8_512x1024_80k_cityscapes_20201214_115716-987f51e3.pth +- Name: apcnet_r101-d8_512x1024_80k_cityscapes + In Collection: APCNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.64 + mIoU(ms+flip): 80.61 + Config: configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes/apcnet_r101-d8_512x1024_80k_cityscapes_20201214_115705-b1ff208a.pth +- Name: apcnet_r50-d8_769x769_80k_cityscapes + In Collection: APCNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.79 + mIoU(ms+flip): 80.35 + Config: configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_769x769_80k_cityscapes/apcnet_r50-d8_769x769_80k_cityscapes_20201214_115718-7ea9fa12.pth +- Name: apcnet_r101-d8_769x769_80k_cityscapes + In Collection: APCNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.45 + mIoU(ms+flip): 79.91 + Config: configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_769x769_80k_cityscapes/apcnet_r101-d8_769x769_80k_cityscapes_20201214_115716-a7fbc2ab.pth +- Name: apcnet_r50-d8_512x512_80k_ade20k + In Collection: APCNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 50.99 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 10.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.2 + mIoU(ms+flip): 43.3 + Config: configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_80k_ade20k/apcnet_r50-d8_512x512_80k_ade20k_20201214_115705-a8626293.pth +- Name: apcnet_r101-d8_512x512_80k_ade20k + In Collection: APCNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 76.34 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 13.6 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.54 + mIoU(ms+flip): 46.65 + Config: configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_80k_ade20k/apcnet_r101-d8_512x512_80k_ade20k_20201214_115704-c656c3fb.pth +- Name: apcnet_r50-d8_512x512_160k_ade20k + In Collection: APCNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.4 + mIoU(ms+flip): 43.94 + Config: configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r50-d8_512x512_160k_ade20k/apcnet_r50-d8_512x512_160k_ade20k_20201214_115706-25fb92c2.pth +- Name: apcnet_r101-d8_512x512_160k_ade20k + In Collection: APCNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.41 + mIoU(ms+flip): 46.63 + Config: configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/apcnet/apcnet_r101-d8_512x512_160k_ade20k/apcnet_r101-d8_512x512_160k_ade20k_20201214_115705-73f9a8d7.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..1e1cec6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..04cb006 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..1ce2279 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..8f10b98 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..5c44ebc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..6169845 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './apcnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..99c61a9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..62a0627 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..f7821c5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..daafa5f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..3db6140 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..9cac425 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/apcnet/apcnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/apcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/README.md new file mode 100644 index 0000000..31e1bd6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/README.md @@ -0,0 +1,85 @@ +# BEiT + +[BEiT: BERT Pre-Training of Image Transformers](https://arxiv.org/abs/2106.08254) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We introduce a self-supervised vision representation model BEiT, which stands for Bidirectional Encoder representation from Image Transformers. Following BERT developed in the natural language processing area, we propose a masked image modeling task to pretrain vision Transformers. Specifically, each image has two views in our pre-training, i.e, image patches (such as 16x16 pixels), and visual tokens (i.e., discrete tokens). We first "tokenize" the original image into visual tokens. Then we randomly mask some image patches and fed them into the backbone Transformer. The pre-training objective is to recover the original visual tokens based on the corrupted image patches. After pre-training BEiT, we directly fine-tune the model parameters on downstream tasks by appending task layers upon the pretrained encoder. Experimental results on image classification and semantic segmentation show that our model achieves competitive results with previous pre-training methods. For example, base-size BEiT achieves 83.2% top-1 accuracy on ImageNet-1K, significantly outperforming from-scratch DeiT training (81.8%) with the same setup. Moreover, large-size BEiT obtains 86.3% only using ImageNet-1K, even outperforming ViT-L with supervised pre-training on ImageNet-22K (85.2%). The code and pretrained models are available at [this https URL](https://github.com/microsoft/unilm/tree/master/beit). + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{beit, + title={{BEiT}: {BERT} Pre-Training of Image Transformers}, + author={Hangbo Bao and Li Dong and Songhao Piao and Furu Wei}, + booktitle={International Conference on Learning Representations}, + year={2022}, + url={https://openreview.net/forum?id=p-BhZSz59o4} +} +``` + +## Usage + +To use other repositories' pre-trained models, it is necessary to convert keys. + +We provide a script [`beit2mmseg.py`](../../tools/model_converters/beit2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/microsoft/unilm/tree/master/beit/semantic_segmentation) to MMSegmentation style. + +```shell +python tools/model_converters/beit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/beit2mmseg.py https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_base_patch16_224_pt22k_ft22k.pth pretrain/beit_base_patch16_224_pt22k_ft22k.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +In our default setting, pretrained models could be defined below: + +| pretrained models | original models | +| ----------------- | --------------------------------------------------------------------------------------------------------------------------- | +| BEiT_base.pth | ['BEiT_base'](https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_base_patch16_224_pt22k_ft22k.pth) | +| BEiT_large.pth | ['BEiT_large'](https://conversationhub.blob.core.windows.net/beit-share-public/beit/beit_large_patch16_224_pt22k_ft22k.pth) | + +Verify the single-scale results of the model: + +```shell +sh tools/dist_test.sh \ +configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py \ +upernet_beit-large_fp16_8x1_640x640_160k_ade20k-8fc0dd5d.pth $GPUS --eval mIoU +``` + +Since relative position embedding requires the input length and width to be equal, the sliding window is adopted for multi-scale inference. So we set min_size=640, that is, the shortest edge is 640. So the multi-scale inference of config is performed separately, instead of '--aug-test'. For multi-scale inference: + +```shell +sh tools/dist_test.sh \ +configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py \ +upernet_beit-large_fp16_8x1_640x640_160k_ade20k-8fc0dd5d.pth $GPUS --eval mIoU +``` + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | pretrain | pretrain img size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------------ | ----------------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ---------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | BEiT-B | 640x640 | ImageNet-22K | 224x224 | 16 | 160000 | 15.88 | 2.00 | 53.08 | 53.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-base_8x2_640x640_160k_ade20k/upernet_beit-base_8x2_640x640_160k_ade20k-eead221d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-base_8x2_640x640_160k_ade20k/upernet_beit-base_8x2_640x640_160k_ade20k.log.json) | +| UPerNet | BEiT-L | 640x640 | ImageNet-22K | 224x224 | 8 | 320000 | 22.64 | 0.96 | 56.33 | 56.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k/upernet_beit-large_fp16_8x1_640x640_160k_ade20k-8fc0dd5d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/beit.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/beit.yml new file mode 100644 index 0000000..602a887 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/beit.yml @@ -0,0 +1,45 @@ +Models: +- Name: upernet_beit-base_8x2_640x640_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: BEiT-B + crop size: (640,640) + lr schd: 160000 + inference time (ms/im): + - value: 500.0 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (640,640) + Training Memory (GB): 15.88 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 53.08 + mIoU(ms+flip): 53.84 + Config: configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-base_8x2_640x640_160k_ade20k/upernet_beit-base_8x2_640x640_160k_ade20k-eead221d.pth +- Name: upernet_beit-large_fp16_8x1_640x640_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: BEiT-L + crop size: (640,640) + lr schd: 320000 + inference time (ms/im): + - value: 1041.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (640,640) + Training Memory (GB): 22.64 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 56.33 + mIoU(ms+flip): 56.84 + Config: configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k/upernet_beit-large_fp16_8x1_640x640_160k_ade20k-8fc0dd5d.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py new file mode 100644 index 0000000..f764c92 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-base_640x640_160k_ade20k_ms.py @@ -0,0 +1,24 @@ +_base_ = './upernet_beit-base_8x2_640x640_160k_ade20k.py' + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2560, 640), + img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True, min_size=640), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline), + samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py new file mode 100644 index 0000000..b36adc3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-base_8x2_640x640_160k_ade20k.py @@ -0,0 +1,30 @@ +_base_ = [ + '../_base_/models/upernet_beit.py', '../_base_/datasets/ade20k_640x640.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict( + pretrained='pretrain/beit_base_patch16_224_pt22k_ft22k.pth', + test_cfg=dict(mode='slide', crop_size=(640, 640), stride=(426, 426))) + +optimizer = dict( + _delete_=True, + type='AdamW', + lr=3e-5, + betas=(0.9, 0.999), + weight_decay=0.05, + constructor='LayerDecayOptimizerConstructor', + paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.9)) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py new file mode 100644 index 0000000..fd4d947 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-large_fp16_640x640_160k_ade20k_ms.py @@ -0,0 +1,22 @@ +_base_ = './upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py' + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2560, 640), + img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True, min_size=640), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py new file mode 100644 index 0000000..e6247b7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/beit/upernet_beit-large_fp16_8x1_640x640_160k_ade20k.py @@ -0,0 +1,47 @@ +_base_ = [ + '../_base_/models/upernet_beit.py', '../_base_/datasets/ade20k_640x640.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_320k.py' +] + +model = dict( + pretrained='pretrain/beit_large_patch16_224_pt22k_ft22k.pth', + backbone=dict( + type='BEiT', + embed_dims=1024, + num_layers=24, + num_heads=16, + mlp_ratio=4, + qv_bias=True, + init_values=1e-6, + drop_path_rate=0.2, + out_indices=[7, 11, 15, 23]), + neck=dict(embed_dim=1024, rescales=[4, 2, 1, 0.5]), + decode_head=dict( + in_channels=[1024, 1024, 1024, 1024], num_classes=150, channels=1024), + auxiliary_head=dict(in_channels=1024, num_classes=150), + test_cfg=dict(mode='slide', crop_size=(640, 640), stride=(426, 426))) + +optimizer = dict( + _delete_=True, + type='AdamW', + lr=2e-5, + betas=(0.9, 0.999), + weight_decay=0.05, + constructor='LayerDecayOptimizerConstructor', + paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95)) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=3000, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +data = dict(samples_per_gpu=1) +optimizer_config = dict( + type='GradientCumulativeFp16OptimizerHook', cumulative_iters=2) + +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/README.md new file mode 100644 index 0000000..58092d6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/README.md @@ -0,0 +1,64 @@ +# BiSeNetV1 + +[BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation](https://arxiv.org/abs/1808.00897) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Semantic segmentation requires both rich spatial information and sizeable receptive field. However, modern approaches usually compromise spatial resolution to achieve real-time inference speed, which leads to poor performance. In this paper, we address this dilemma with a novel Bilateral Segmentation Network (BiSeNet). We first design a Spatial Path with a small stride to preserve the spatial information and generate high-resolution features. Meanwhile, a Context Path with a fast downsampling strategy is employed to obtain sufficient receptive field. On top of the two paths, we introduce a new Feature Fusion Module to combine features efficiently. The proposed architecture makes a right balance between the speed and segmentation performance on Cityscapes, CamVid, and COCO-Stuff datasets. Specifically, for a 2048x1024 input, we achieve 68.4% Mean IOU on the Cityscapes test dataset with speed of 105 FPS on one NVIDIA Titan XP card, which is significantly faster than the existing methods with comparable performance. + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{yu2018bisenet, + title={Bisenet: Bilateral segmentation network for real-time semantic segmentation}, + author={Yu, Changqian and Wang, Jingbo and Peng, Chao and Gao, Changxin and Yu, Gang and Sang, Nong}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={325--341}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ----------------------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| BiSeNetV1 (No Pretrain) | R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | 74.44 | 77.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239-c55e78e2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239.log.json) | +| BiSeNetV1 | R-18-D32 | 1024x1024 | 160000 | 5.69 | 31.77 | 74.37 | 76.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251-8ba80eff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251.log.json) | +| BiSeNetV1 (4x8) | R-18-D32 | 1024x1024 | 160000 | 11.17 | 31.77 | 75.16 | 77.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322-bb8db75f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322.log.json) | +| BiSeNetV1 (No Pretrain) | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 76.92 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639.log.json) | +| BiSeNetV1 | R-50-D32 | 1024x1024 | 160000 | 15.39 | 7.71 | 77.68 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628.log.json) | + +### COCO-Stuff 164k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ----------------------- | --------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| BiSeNetV1 (No Pretrain) | R-18-D32 | 512x512 | 160000 | - | - | 25.45 | 26.15 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328-046aa2f2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328.log.json) | +| BiSeNetV1 | R-18-D32 | 512x512 | 160000 | 6.33 | 74.24 | 28.55 | 29.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100-f700dbf7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100.log.json) | +| BiSeNetV1 (No Pretrain) | R-50-D32 | 512x512 | 160000 | - | - | 29.82 | 30.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616-d2bb0df4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616.log.json) | +| BiSeNetV1 | R-50-D32 | 512x512 | 160000 | 9.28 | 32.60 | 34.88 | 35.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932-66747911.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932.log.json) | +| BiSeNetV1 (No Pretrain) | R-101-D32 | 512x512 | 160000 | - | - | 31.14 | 31.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147-c6b32c3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147.log.json) | +| BiSeNetV1 | R-101-D32 | 512x512 | 160000 | 10.36 | 25.25 | 37.38 | 37.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220-28c8f092.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220.log.json) | + +Note: + +- `4x8`: Using 4 GPUs with 8 samples per GPU in training. +- For BiSeNetV1 on Cityscapes dataset, default setting is 4 GPUs with 4 samples per GPU in training. +- `No Pretrain` means the model is trained from scratch. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1.yml new file mode 100644 index 0000000..61f264b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1.yml @@ -0,0 +1,234 @@ +Collections: +- Name: BiSeNetV1 + Metadata: + Training Data: + - Cityscapes + - COCO-Stuff 164k + Paper: + URL: https://arxiv.org/abs/1808.00897 + Title: 'BiSeNet: Bilateral Segmentation Network for Real-time Semantic Segmentation' + README: configs/bisenetv1/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv1.py#L266 + Version: v0.18.0 + Converted From: + Code: https://github.com/ycszen/TorchSeg/tree/master/model/bisenet +Models: +- Name: bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes + In Collection: BiSeNetV1 + Metadata: + backbone: R-18-D32 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 31.48 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 5.69 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.44 + mIoU(ms+flip): 77.05 + Config: configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes_20210922_172239-c55e78e2.pth +- Name: bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes + In Collection: BiSeNetV1 + Metadata: + backbone: R-18-D32 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 31.48 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 5.69 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.37 + mIoU(ms+flip): 76.91 + Config: configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210905_220251-8ba80eff.pth +- Name: bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes + In Collection: BiSeNetV1 + Metadata: + backbone: R-18-D32 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 31.48 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 11.17 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.16 + mIoU(ms+flip): 77.24 + Config: configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes_20210905_220322-bb8db75f.pth +- Name: bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes + In Collection: BiSeNetV1 + Metadata: + backbone: R-50-D32 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 129.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 15.39 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.92 + mIoU(ms+flip): 78.87 + Config: configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes_20210923_222639-7b28a2a6.pth +- Name: bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes + In Collection: BiSeNetV1 + Metadata: + backbone: R-50-D32 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 129.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 15.39 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.68 + mIoU(ms+flip): 79.57 + Config: configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes_20210917_234628-8b304447.pth +- Name: bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: BiSeNetV1 + Metadata: + backbone: R-18-D32 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 25.45 + mIoU(ms+flip): 26.15 + Config: configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211022_054328-046aa2f2.pth +- Name: bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: BiSeNetV1 + Metadata: + backbone: R-18-D32 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 13.47 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.33 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 28.55 + mIoU(ms+flip): 29.26 + Config: configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211023_013100-f700dbf7.pth +- Name: bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: BiSeNetV1 + Metadata: + backbone: R-50-D32 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 29.82 + mIoU(ms+flip): 30.33 + Config: configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_040616-d2bb0df4.pth +- Name: bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: BiSeNetV1 + Metadata: + backbone: R-50-D32 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 30.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.28 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 34.88 + mIoU(ms+flip): 35.37 + Config: configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_181932-66747911.pth +- Name: bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: BiSeNetV1 + Metadata: + backbone: R-101-D32 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 31.14 + mIoU(ms+flip): 31.76 + Config: configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211102_164147-c6b32c3b.pth +- Name: bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k + In Collection: BiSeNetV1 + Metadata: + backbone: R-101-D32 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 39.6 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 10.36 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 37.38 + mIoU(ms+flip): 37.99 + Config: configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k_20211101_225220-28c8f092.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000..c3fe215 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r101-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,6 @@ +_base_ = './bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c')))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000..b1e1c3e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r101-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + context_channels=(512, 1024, 2048), + spatial_channels=(256, 256, 256, 512), + out_channels=1024, + backbone_cfg=dict(type='ResNet', depth=101)), + decode_head=dict(in_channels=1024, channels=1024, num_classes=171), + auxiliary_head=[ + dict(in_channels=512, channels=256, num_classes=171), + dict(in_channels=512, channels=256, num_classes=171), + ]) +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.005) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..f4019e9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_4x4_1024x1024_160k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.025) +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..ef061a1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py @@ -0,0 +1,16 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.025) +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..f4b9f6d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_4x8_1024x1024_160k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = './bisenetv1_r18-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py' +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000..c6d9304 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,6 @@ +_base_ = './bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c'))), ) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000..78d7fea --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r18-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,13 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=171), + auxiliary_head=[ + dict(num_classes=171), + dict(num_classes=171), + ]) +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.005) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..7cadd50 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py @@ -0,0 +1,42 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='BiSeNetV1', + context_channels=(512, 1024, 2048), + spatial_channels=(256, 256, 256, 512), + out_channels=1024, + backbone_cfg=dict(type='ResNet', depth=50)), + decode_head=dict( + type='FCNHead', in_channels=1024, in_index=0, channels=1024), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=512, + channels=256, + num_convs=1, + num_classes=19, + in_index=1, + norm_cfg=norm_cfg, + concat_input=False), + dict( + type='FCNHead', + in_channels=512, + channels=256, + num_convs=1, + num_classes=19, + in_index=2, + norm_cfg=norm_cfg, + concat_input=False), + ]) +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.05) +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..5625a76 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_4x4_1024x1024_160k_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = './bisenetv1_r50-d32_4x4_1024x1024_160k_cityscapes.py' +model = dict( + type='EncoderDecoder', + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000..f0fea69 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_in1k-pre_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,7 @@ +_base_ = './bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py' + +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py new file mode 100644 index 0000000..dbbccc6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv1/bisenetv1_r50-d32_lr5e-3_4x4_512x512_160k_coco-stuff164k.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/bisenetv1_r18-d32.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +model = dict( + backbone=dict( + context_channels=(512, 1024, 2048), + spatial_channels=(256, 256, 256, 512), + out_channels=1024, + backbone_cfg=dict(type='ResNet', depth=50)), + decode_head=dict(in_channels=1024, channels=1024, num_classes=171), + auxiliary_head=[ + dict(in_channels=512, channels=256, num_classes=171), + dict(in_channels=512, channels=256, num_classes=171), + ]) +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.005) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/README.md new file mode 100644 index 0000000..6b74b7e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/README.md @@ -0,0 +1,53 @@ +# BiSeNetV2 + +[Bisenet v2: Bilateral Network with Guided Aggregation for Real-time Semantic Segmentation](https://arxiv.org/abs/2004.02147) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The low-level details and high-level semantics are both essential to the semantic segmentation task. However, to speed up the model inference, current approaches almost always sacrifice the low-level details, which leads to a considerable accuracy decrease. We propose to treat these spatial details and categorical semantics separately to achieve high accuracy and high efficiency for realtime semantic segmentation. To this end, we propose an efficient and effective architecture with a good trade-off between speed and accuracy, termed Bilateral Segmentation Network (BiSeNet V2). This architecture involves: (i) a Detail Branch, with wide channels and shallow layers to capture low-level details and generate high-resolution feature representation; (ii) a Semantic Branch, with narrow channels and deep layers to obtain high-level semantic context. The Semantic Branch is lightweight due to reducing the channel capacity and a fast-downsampling strategy. Furthermore, we design a Guided Aggregation Layer to enhance mutual connections and fuse both types of feature representation. Besides, a booster training strategy is designed to improve the segmentation performance without any extra inference cost. Extensive quantitative and qualitative evaluations demonstrate that the proposed architecture performs favourably against a few state-of-the-art real-time semantic segmentation approaches. Specifically, for a 2,048x1,024 input, we achieve 72.6% Mean IoU on the Cityscapes test set with a speed of 156 FPS on one NVIDIA GeForce GTX 1080 Ti card, which is significantly faster than existing methods, yet we achieve better segmentation accuracy. + + + +
+ +
+ +## Citation + +```bibtex +@article{yu2021bisenet, + title={Bisenet v2: Bilateral network with guided aggregation for real-time semantic segmentation}, + author={Yu, Changqian and Gao, Changxin and Wang, Jingbo and Yu, Gang and Shen, Chunhua and Sang, Nong}, + journal={International Journal of Computer Vision}, + pages={1--18}, + year={2021}, + publisher={Springer} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | --------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| BiSeNetV2 | BiSeNetV2 | 1024x1024 | 160000 | 7.64 | 31.77 | 73.21 | 75.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes_20210902_015551-bcf10f09.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes_20210902_015551.log.json) | +| BiSeNetV2 (OHEM) | BiSeNetV2 | 1024x1024 | 160000 | 7.64 | - | 73.57 | 75.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes_20210902_112947-5f8103b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes_20210902_112947.log.json) | +| BiSeNetV2 (4x8) | BiSeNetV2 | 1024x1024 | 160000 | 15.05 | - | 75.76 | 77.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes_20210903_000032-e1a2eed6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes_20210903_000032.log.json) | +| BiSeNetV2 (FP16) | BiSeNetV2 | 1024x1024 | 160000 | 5.77 | 36.65 | 73.07 | 75.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes_20210902_045942-b979777b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes_20210902_045942.log.json) | + +Note: + +- `OHEM` means Online Hard Example Mining (OHEM) is adopted in training. +- `FP16` means Mixed Precision (FP16) is adopted in training. +- `4x8` means 4 GPUs with 8 samples per GPU in training. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2.yml new file mode 100644 index 0000000..455fa6c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2.yml @@ -0,0 +1,88 @@ +Collections: +- Name: BiSeNetV2 + Metadata: + Training Data: + - Cityscapes + Paper: + URL: https://arxiv.org/abs/2004.02147 + Title: 'Bisenet v2: Bilateral Network with Guided Aggregation for Real-time Semantic + Segmentation' + README: configs/bisenetv2/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/backbones/bisenetv2.py#L545 + Version: v0.18.0 +Models: +- Name: bisenetv2_fcn_4x4_1024x1024_160k_cityscapes + In Collection: BiSeNetV2 + Metadata: + backbone: BiSeNetV2 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 31.48 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 7.64 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.21 + mIoU(ms+flip): 75.74 + Config: configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes_20210902_015551-bcf10f09.pth +- Name: bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes + In Collection: BiSeNetV2 + Metadata: + backbone: BiSeNetV2 + crop size: (1024,1024) + lr schd: 160000 + Training Memory (GB): 7.64 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.57 + mIoU(ms+flip): 75.8 + Config: configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes_20210902_112947-5f8103b4.pth +- Name: bisenetv2_fcn_4x8_1024x1024_160k_cityscapes + In Collection: BiSeNetV2 + Metadata: + backbone: BiSeNetV2 + crop size: (1024,1024) + lr schd: 160000 + Training Memory (GB): 15.05 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.76 + mIoU(ms+flip): 77.79 + Config: configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes_20210903_000032-e1a2eed6.pth +- Name: bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes + In Collection: BiSeNetV2 + Metadata: + backbone: BiSeNetV2 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 27.29 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (1024,1024) + Training Memory (GB): 5.77 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.07 + mIoU(ms+flip): 75.13 + Config: configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes_20210902_045942-b979777b.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..1248bd8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/bisenetv2.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.05) +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..5e93bea --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_4x8_1024x1024_160k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/bisenetv2.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.05) +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..0196214 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = './bisenetv2_fcn_4x4_1024x1024_160k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..f14e528 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/bisenetv2/bisenetv2_fcn_ohem_4x4_1024x1024_160k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/bisenetv2.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +sampler = dict(type='OHEMPixelSampler', thresh=0.7, min_kept=10000) +lr_config = dict(warmup='linear', warmup_iters=1000) +optimizer = dict(lr=0.05) +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/README.md new file mode 100644 index 0000000..48c37a8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/README.md @@ -0,0 +1,67 @@ +# CCNet + +[CCNet: Criss-Cross Attention for Semantic Segmentation](https://arxiv.org/abs/1811.11721) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Contextual information is vital in visual understanding problems, such as semantic segmentation and object detection. We propose a Criss-Cross Network (CCNet) for obtaining full-image contextual information in a very effective and efficient way. Concretely, for each pixel, a novel criss-cross attention module harvests the contextual information of all the pixels on its criss-cross path. By taking a further recurrent operation, each pixel can finally capture the full-image dependencies. Besides, a category consistent loss is proposed to enforce the criss-cross attention module to produce more discriminative features. Overall, CCNet is with the following merits: 1) GPU memory friendly. Compared with the non-local block, the proposed recurrent criss-cross attention module requires 11x less GPU memory usage. 2) High computational efficiency. The recurrent criss-cross attention significantly reduces FLOPs by about 85% of the non-local block. 3) The state-of-the-art performance. We conduct extensive experiments on semantic segmentation benchmarks including Cityscapes, ADE20K, human parsing benchmark LIP, instance segmentation benchmark COCO, video segmentation benchmark CamVid. In particular, our CCNet achieves the mIoU scores of 81.9%, 45.76% and 55.47% on the Cityscapes test set, the ADE20K validation set and the LIP validation set respectively, which are the new state-of-the-art results. The source codes are available at [this https URL](https://github.com/speedinghzl/CCNet). + + + +
+ +
+ +## Citation + +```bibtex +@article{huang2018ccnet, + title={CCNet: Criss-Cross Attention for Semantic Segmentation}, + author={Huang, Zilong and Wang, Xinggang and Huang, Lichao and Huang, Chang and Wei, Yunchao and Liu, Wenyu}, + booktitle={ICCV}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CCNet | R-50-D8 | 512x1024 | 40000 | 6 | 3.32 | 77.76 | 78.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517-4123f401.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517.log.json) | +| CCNet | R-101-D8 | 512x1024 | 40000 | 9.5 | 2.31 | 76.35 | 78.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540-a3b84ba6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540.log.json) | +| CCNet | R-50-D8 | 769x769 | 40000 | 6.8 | 1.43 | 78.46 | 79.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125-76d11884.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125.log.json) | +| CCNet | R-101-D8 | 769x769 | 40000 | 10.7 | 1.01 | 76.94 | 78.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428-4f57c8d0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428.log.json) | +| CCNet | R-50-D8 | 512x1024 | 80000 | - | - | 79.03 | 80.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421-869a3423.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421.log.json) | +| CCNet | R-101-D8 | 512x1024 | 80000 | - | - | 78.87 | 79.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935-ffae8917.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935.log.json) | +| CCNet | R-50-D8 | 769x769 | 80000 | - | - | 79.29 | 81.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421-73eed8ca.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421.log.json) | +| CCNet | R-101-D8 | 769x769 | 80000 | - | - | 79.45 | 80.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502-ad3cd481.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CCNet | R-50-D8 | 512x512 | 80000 | 8.8 | 20.89 | 41.78 | 42.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848-aa37f61e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848.log.json) | +| CCNet | R-101-D8 | 512x512 | 80000 | 12.2 | 14.11 | 43.97 | 45.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848-1f4929a3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848.log.json) | +| CCNet | R-50-D8 | 512x512 | 160000 | - | - | 42.08 | 43.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435-7c97193b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435.log.json) | +| CCNet | R-101-D8 | 512x512 | 160000 | - | - | 43.71 | 45.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644-e849e007.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| CCNet | R-50-D8 | 512x512 | 20000 | 6 | 20.45 | 76.17 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212-fad81784.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212.log.json) | +| CCNet | R-101-D8 | 512x512 | 20000 | 9.5 | 13.64 | 77.27 | 79.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212-0007b61d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212.log.json) | +| CCNet | R-50-D8 | 512x512 | 40000 | - | - | 75.96 | 77.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127-c2a15f02.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127.log.json) | +| CCNet | R-101-D8 | 512x512 | 40000 | - | - | 77.87 | 78.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127-c30da577.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet.yml new file mode 100644 index 0000000..b264f2e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet.yml @@ -0,0 +1,305 @@ +Collections: +- Name: CCNet + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + URL: https://arxiv.org/abs/1811.11721 + Title: 'CCNet: Criss-Cross Attention for Semantic Segmentation' + README: configs/ccnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/apc_head.py#L111 + Version: v0.17.0 + Converted From: + Code: https://github.com/speedinghzl/CCNet +Models: +- Name: ccnet_r50-d8_512x1024_40k_cityscapes + In Collection: CCNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 301.2 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.76 + mIoU(ms+flip): 78.87 + Config: configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes/ccnet_r50-d8_512x1024_40k_cityscapes_20200616_142517-4123f401.pth +- Name: ccnet_r101-d8_512x1024_40k_cityscapes + In Collection: CCNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 432.9 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.35 + mIoU(ms+flip): 78.19 + Config: configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes/ccnet_r101-d8_512x1024_40k_cityscapes_20200616_142540-a3b84ba6.pth +- Name: ccnet_r50-d8_769x769_40k_cityscapes + In Collection: CCNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 699.3 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.46 + mIoU(ms+flip): 79.93 + Config: configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_40k_cityscapes/ccnet_r50-d8_769x769_40k_cityscapes_20200616_145125-76d11884.pth +- Name: ccnet_r101-d8_769x769_40k_cityscapes + In Collection: CCNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 990.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.94 + mIoU(ms+flip): 78.62 + Config: configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_40k_cityscapes/ccnet_r101-d8_769x769_40k_cityscapes_20200617_101428-4f57c8d0.pth +- Name: ccnet_r50-d8_512x1024_80k_cityscapes + In Collection: CCNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.03 + mIoU(ms+flip): 80.16 + Config: configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes/ccnet_r50-d8_512x1024_80k_cityscapes_20200617_010421-869a3423.pth +- Name: ccnet_r101-d8_512x1024_80k_cityscapes + In Collection: CCNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.87 + mIoU(ms+flip): 79.9 + Config: configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes/ccnet_r101-d8_512x1024_80k_cityscapes_20200617_203935-ffae8917.pth +- Name: ccnet_r50-d8_769x769_80k_cityscapes + In Collection: CCNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.29 + mIoU(ms+flip): 81.08 + Config: configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_769x769_80k_cityscapes/ccnet_r50-d8_769x769_80k_cityscapes_20200617_010421-73eed8ca.pth +- Name: ccnet_r101-d8_769x769_80k_cityscapes + In Collection: CCNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.45 + mIoU(ms+flip): 80.66 + Config: configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_769x769_80k_cityscapes/ccnet_r101-d8_769x769_80k_cityscapes_20200618_011502-ad3cd481.pth +- Name: ccnet_r50-d8_512x512_80k_ade20k + In Collection: CCNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 47.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.8 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.78 + mIoU(ms+flip): 42.98 + Config: configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_80k_ade20k/ccnet_r50-d8_512x512_80k_ade20k_20200615_014848-aa37f61e.pth +- Name: ccnet_r101-d8_512x512_80k_ade20k + In Collection: CCNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 70.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.97 + mIoU(ms+flip): 45.13 + Config: configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_80k_ade20k/ccnet_r101-d8_512x512_80k_ade20k_20200615_014848-1f4929a3.pth +- Name: ccnet_r50-d8_512x512_160k_ade20k + In Collection: CCNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.08 + mIoU(ms+flip): 43.13 + Config: configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_160k_ade20k/ccnet_r50-d8_512x512_160k_ade20k_20200616_084435-7c97193b.pth +- Name: ccnet_r101-d8_512x512_160k_ade20k + In Collection: CCNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.71 + mIoU(ms+flip): 45.04 + Config: configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_160k_ade20k/ccnet_r101-d8_512x512_160k_ade20k_20200616_000644-e849e007.pth +- Name: ccnet_r50-d8_512x512_20k_voc12aug + In Collection: CCNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 48.9 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.0 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.17 + mIoU(ms+flip): 77.51 + Config: configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_20k_voc12aug/ccnet_r50-d8_512x512_20k_voc12aug_20200617_193212-fad81784.pth +- Name: ccnet_r101-d8_512x512_20k_voc12aug + In Collection: CCNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 73.31 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.5 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.27 + mIoU(ms+flip): 79.02 + Config: configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_20k_voc12aug/ccnet_r101-d8_512x512_20k_voc12aug_20200617_193212-0007b61d.pth +- Name: ccnet_r50-d8_512x512_40k_voc12aug + In Collection: CCNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.96 + mIoU(ms+flip): 77.04 + Config: configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r50-d8_512x512_40k_voc12aug/ccnet_r50-d8_512x512_40k_voc12aug_20200613_232127-c2a15f02.pth +- Name: ccnet_r101-d8_512x512_40k_voc12aug + In Collection: CCNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.87 + mIoU(ms+flip): 78.9 + Config: configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ccnet/ccnet_r101-d8_512x512_40k_voc12aug/ccnet_r101-d8_512x512_40k_voc12aug_20200613_232127-c30da577.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..d2bac38 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..989928a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..c32bf48 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..53eb77c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..d7eb668 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..029c1d5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..43f05fa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..654f377 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './ccnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..6a4316d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..16e3435 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..1ad94d8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..bbcd29c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..947b8ac --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..1a1f49c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..580d59c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..c6dac64 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ccnet/ccnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/ccnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/README.md new file mode 100644 index 0000000..b0fced4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/README.md @@ -0,0 +1,46 @@ +# CGNet + +[CGNet: A Light-weight Context Guided Network for Semantic Segmentation](https://arxiv.org/abs/1811.08201) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The demand of applying semantic segmentation model on mobile devices has been increasing rapidly. Current state-of-the-art networks have enormous amount of parameters hence unsuitable for mobile devices, while other small memory footprint models follow the spirit of classification network and ignore the inherent characteristic of semantic segmentation. To tackle this problem, we propose a novel Context Guided Network (CGNet), which is a light-weight and efficient network for semantic segmentation. We first propose the Context Guided (CG) block, which learns the joint feature of both local feature and surrounding context, and further improves the joint feature with the global context. Based on the CG block, we develop CGNet which captures contextual information in all stages of the network and is specially tailored for increasing segmentation accuracy. CGNet is also elaborately designed to reduce the number of parameters and save memory footprint. Under an equivalent number of parameters, the proposed CGNet significantly outperforms existing segmentation networks. Extensive experiments on Cityscapes and CamVid datasets verify the effectiveness of the proposed approach. Specifically, without any post-processing and multi-scale testing, the proposed CGNet achieves 64.8% mean IoU on Cityscapes with less than 0.5 M parameters. The source code for the complete system can be found at [this https URL](https://github.com/wutianyiRosun/CGNet). + + + +
+ +
+ +## Citation + +```bibtext +@article{wu2020cgnet, + title={Cgnet: A light-weight context guided network for semantic segmentation}, + author={Wu, Tianyi and Tang, Sheng and Zhang, Rui and Cao, Juan and Zhang, Yongdong}, + journal={IEEE Transactions on Image Processing}, + volume={30}, + pages={1169--1179}, + year={2020}, + publisher={IEEE} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| CGNet | M3N21 | 680x680 | 60000 | 7.5 | 30.51 | 65.63 | 68.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet/cgnet_680x680_60k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes_20201101_110253-4c0b2f2d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes-20201101_110253.log.json) | +| CGNet | M3N21 | 512x1024 | 60000 | 8.3 | 31.14 | 68.27 | 70.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet/cgnet_512x1024_60k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes_20201101_110254-124ea03b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes-20201101_110254.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/cgnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/cgnet.yml new file mode 100644 index 0000000..bcd6d89 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/cgnet.yml @@ -0,0 +1,59 @@ +Collections: +- Name: CGNet + Metadata: + Training Data: + - Cityscapes + Paper: + URL: https://arxiv.org/abs/1811.08201 + Title: 'CGNet: A Light-weight Context Guided Network for Semantic Segmentation' + README: configs/cgnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/cgnet.py#L187 + Version: v0.17.0 + Converted From: + Code: https://github.com/wutianyiRosun/CGNet +Models: +- Name: cgnet_680x680_60k_cityscapes + In Collection: CGNet + Metadata: + backbone: M3N21 + crop size: (680,680) + lr schd: 60000 + inference time (ms/im): + - value: 32.78 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (680,680) + Training Memory (GB): 7.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 65.63 + mIoU(ms+flip): 68.04 + Config: configs/cgnet/cgnet_680x680_60k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_680x680_60k_cityscapes/cgnet_680x680_60k_cityscapes_20201101_110253-4c0b2f2d.pth +- Name: cgnet_512x1024_60k_cityscapes + In Collection: CGNet + Metadata: + backbone: M3N21 + crop size: (512,1024) + lr schd: 60000 + inference time (ms/im): + - value: 32.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 8.3 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 68.27 + mIoU(ms+flip): 70.33 + Config: configs/cgnet/cgnet_512x1024_60k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/cgnet/cgnet_512x1024_60k_cityscapes/cgnet_512x1024_60k_cityscapes_20201101_110254-124ea03b.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/cgnet_512x1024_60k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/cgnet_512x1024_60k_cityscapes.py new file mode 100644 index 0000000..6efc4cd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/cgnet_512x1024_60k_cityscapes.py @@ -0,0 +1,66 @@ +_base_ = ['../_base_/models/cgnet.py', '../_base_/default_runtime.py'] + +# optimizer +optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +total_iters = 60000 +checkpoint_config = dict(by_epoch=False, interval=4000) +evaluation = dict(interval=4000, metric='mIoU') + +# dataset settings +dataset_type = 'CityscapesDataset' +data_root = 'data/cityscapes/' +img_norm_cfg = dict( + mean=[72.39239876, 82.90891754, 73.15835921], std=[1, 1, 1], to_rgb=True) +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/train', + ann_dir='gtFine/train', + pipeline=train_pipeline), + val=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline), + test=dict( + type=dataset_type, + data_root=data_root, + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/cgnet_680x680_60k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/cgnet_680x680_60k_cityscapes.py new file mode 100644 index 0000000..7d4ebe1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/cgnet/cgnet_680x680_60k_cityscapes.py @@ -0,0 +1,50 @@ +_base_ = [ + '../_base_/models/cgnet.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py' +] + +# optimizer +optimizer = dict(type='Adam', lr=0.001, eps=1e-08, weight_decay=0.0005) +optimizer_config = dict() +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-4, by_epoch=False) +# runtime settings +total_iters = 60000 +checkpoint_config = dict(by_epoch=False, interval=4000) +evaluation = dict(interval=4000, metric='mIoU') + +img_norm_cfg = dict( + mean=[72.39239876, 82.90891754, 73.15835921], std=[1, 1, 1], to_rgb=True) +crop_size = (680, 680) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/README.md new file mode 100644 index 0000000..09eb702 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/README.md @@ -0,0 +1,72 @@ +# ConvNeXt + +[A ConvNet for the 2020s](https://arxiv.org/abs/2201.03545) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The "Roaring 20s" of visual recognition began with the introduction of Vision Transformers (ViTs), which quickly superseded ConvNets as the state-of-the-art image classification model. A vanilla ViT, on the other hand, faces difficulties when applied to general computer vision tasks such as object detection and semantic segmentation. It is the hierarchical Transformers (e.g., Swin Transformers) that reintroduced several ConvNet priors, making Transformers practically viable as a generic vision backbone and demonstrating remarkable performance on a wide variety of vision tasks. However, the effectiveness of such hybrid approaches is still largely credited to the intrinsic superiority of Transformers, rather than the inherent inductive biases of convolutions. In this work, we reexamine the design spaces and test the limits of what a pure ConvNet can achieve. We gradually "modernize" a standard ResNet toward the design of a vision Transformer, and discover several key components that contribute to the performance difference along the way. The outcome of this exploration is a family of pure ConvNet models dubbed ConvNeXt. Constructed entirely from standard ConvNet modules, ConvNeXts compete favorably with Transformers in terms of accuracy and scalability, achieving 87.8% ImageNet top-1 accuracy and outperforming Swin Transformers on COCO detection and ADE20K segmentation, while maintaining the simplicity and efficiency of standard ConvNets. + + + +
+ +
+ +```bibtex +@article{liu2022convnet, + title={A ConvNet for the 2020s}, + author={Liu, Zhuang and Mao, Hanzi and Wu, Chao-Yuan and Feichtenhofer, Christoph and Darrell, Trevor and Xie, Saining}, + journal={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2022} +} +``` + +### Usage + +- ConvNeXt backbone needs to install [MMClassification](https://github.com/open-mmlab/mmclassification) first, which has abundant backbones for downstream tasks. + +```shell +pip install mmcls>=0.20.1 +``` + +### Pre-trained Models + +The pre-trained models on ImageNet-1k or ImageNet-21k are used to fine-tune on the downstream tasks. + +| Model | Training Data | Params(M) | Flops(G) | Download | +| :-----------: | :-----------: | :-------: | :------: | :----------------------------------------------------------------------------------------------------------------------------------------------: | +| ConvNeXt-T\* | ImageNet-1k | 28.59 | 4.46 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth) | +| ConvNeXt-S\* | ImageNet-1k | 50.22 | 8.69 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth) | +| ConvNeXt-B\* | ImageNet-1k | 88.59 | 15.36 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_32xb128-noema_in1k_20220301-2a0ee547.pth) | +| ConvNeXt-B\* | ImageNet-21k | 88.59 | 15.36 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_in21k_20220301-262fd037.pth) | +| ConvNeXt-L\* | ImageNet-21k | 197.77 | 34.37 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-large_3rdparty_in21k_20220301-e6e0ea0a.pth) | +| ConvNeXt-XL\* | ImageNet-21k | 350.20 | 60.93 | [model](https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-xlarge_3rdparty_in21k_20220301-08aa5ddc.pth) | + +*Models with* are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt/tree/main/semantic_segmentation#results-and-fine-tuned-models).\* + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | ----------- | --------- | ------- | -------- | -------------- | ----- | ------------- | --------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | ConvNeXt-T | 512x512 | 160000 | 4.23 | 19.90 | 46.11 | 46.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k/upernet_convnext_tiny_fp16_512x512_160k_ade20k_20220227_124553-cad485de.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k/upernet_convnext_tiny_fp16_512x512_160k_ade20k_20220227_124553.log.json) | +| UPerNet | ConvNeXt-S | 512x512 | 160000 | 5.16 | 15.18 | 48.56 | 49.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k/upernet_convnext_small_fp16_512x512_160k_ade20k_20220227_131208-1b1e394f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k/upernet_convnext_small_fp16_512x512_160k_ade20k_20220227_131208.log.json) | +| UPerNet | ConvNeXt-B | 512x512 | 160000 | 6.33 | 14.41 | 48.71 | 49.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k/upernet_convnext_base_fp16_512x512_160k_ade20k_20220227_181227-02a24fc6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k/upernet_convnext_base_fp16_512x512_160k_ade20k_20220227_181227.log.json) | +| UPerNet | ConvNeXt-B | 640x640 | 160000 | 8.53 | 10.88 | 52.13 | 52.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k/upernet_convnext_base_fp16_640x640_160k_ade20k_20220227_182859-9280e39b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k/upernet_convnext_base_fp16_640x640_160k_ade20k_20220227_182859.log.json) | +| UPerNet | ConvNeXt-L | 640x640 | 160000 | 12.08 | 7.69 | 53.16 | 53.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k/upernet_convnext_large_fp16_640x640_160k_ade20k_20220226_040532-e57aa54d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k/upernet_convnext_large_fp16_640x640_160k_ade20k_20220226_040532.log.json) | +| UPerNet | ConvNeXt-XL | 640x640 | 160000 | 26.16\* | 6.33 | 53.58 | 54.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k/upernet_convnext_xlarge_fp16_640x640_160k_ade20k_20220226_080344-95fc38c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k/upernet_convnext_xlarge_fp16_640x640_160k_ade20k_20220226_080344.log.json) | + +Note: + +- `Mem (GB)` with * is collected when `cudnn_benchmark=True`, and hardware is V100. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/convnext.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/convnext.yml new file mode 100644 index 0000000..2b943aa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/convnext.yml @@ -0,0 +1,133 @@ +Models: +- Name: upernet_convnext_tiny_fp16_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: ConvNeXt-T + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 50.25 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (512,512) + Training Memory (GB): 4.23 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.11 + mIoU(ms+flip): 46.62 + Config: configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k/upernet_convnext_tiny_fp16_512x512_160k_ade20k_20220227_124553-cad485de.pth +- Name: upernet_convnext_small_fp16_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: ConvNeXt-S + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 65.88 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (512,512) + Training Memory (GB): 5.16 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.56 + mIoU(ms+flip): 49.02 + Config: configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k/upernet_convnext_small_fp16_512x512_160k_ade20k_20220227_131208-1b1e394f.pth +- Name: upernet_convnext_base_fp16_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: ConvNeXt-B + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 69.4 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (512,512) + Training Memory (GB): 6.33 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.71 + mIoU(ms+flip): 49.54 + Config: configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k/upernet_convnext_base_fp16_512x512_160k_ade20k_20220227_181227-02a24fc6.pth +- Name: upernet_convnext_base_fp16_640x640_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: ConvNeXt-B + crop size: (640,640) + lr schd: 160000 + inference time (ms/im): + - value: 91.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (640,640) + Training Memory (GB): 8.53 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.13 + mIoU(ms+flip): 52.66 + Config: configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k/upernet_convnext_base_fp16_640x640_160k_ade20k_20220227_182859-9280e39b.pth +- Name: upernet_convnext_large_fp16_640x640_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: ConvNeXt-L + crop size: (640,640) + lr schd: 160000 + inference time (ms/im): + - value: 130.04 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (640,640) + Training Memory (GB): 12.08 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 53.16 + mIoU(ms+flip): 53.38 + Config: configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k/upernet_convnext_large_fp16_640x640_160k_ade20k_20220226_040532-e57aa54d.pth +- Name: upernet_convnext_xlarge_fp16_640x640_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: ConvNeXt-XL + crop size: (640,640) + lr schd: 160000 + inference time (ms/im): + - value: 157.98 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (640,640) + Training Memory (GB): 26.16 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 53.58 + mIoU(ms+flip): 54.11 + Config: configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k/upernet_convnext_xlarge_fp16_640x640_160k_ade20k_20220226_080344-95fc38c2.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py new file mode 100644 index 0000000..7bf35b2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_base_fp16_512x512_160k_ade20k.py @@ -0,0 +1,40 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +model = dict( + decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), + auxiliary_head=dict(in_channels=512, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), +) + +optimizer = dict( + constructor='LearningRateDecayOptimizerConstructor', + _delete_=True, + type='AdamW', + lr=0.0001, + betas=(0.9, 0.999), + weight_decay=0.05, + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py new file mode 100644 index 0000000..8d2c0c2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_base_fp16_640x640_160k_ade20k.py @@ -0,0 +1,55 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', + '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-base_3rdparty_in21k_20220301-262fd037.pth' # noqa +model = dict( + backbone=dict( + type='mmcls.ConvNeXt', + arch='base', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + in_channels=[128, 256, 512, 1024], + num_classes=150, + ), + auxiliary_head=dict(in_channels=512, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)), +) + +optimizer = dict( + constructor='LearningRateDecayOptimizerConstructor', + _delete_=True, + type='AdamW', + lr=0.0001, + betas=(0.9, 0.999), + weight_decay=0.05, + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py new file mode 100644 index 0000000..7527ed5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_large_fp16_640x640_160k_ade20k.py @@ -0,0 +1,55 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', + '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-large_3rdparty_in21k_20220301-e6e0ea0a.pth' # noqa +model = dict( + backbone=dict( + type='mmcls.ConvNeXt', + arch='large', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + in_channels=[192, 384, 768, 1536], + num_classes=150, + ), + auxiliary_head=dict(in_channels=768, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)), +) + +optimizer = dict( + constructor='LearningRateDecayOptimizerConstructor', + _delete_=True, + type='AdamW', + lr=0.0001, + betas=(0.9, 0.999), + weight_decay=0.05, + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py new file mode 100644 index 0000000..2e95f3a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_small_fp16_512x512_160k_ade20k.py @@ -0,0 +1,54 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth' # noqa +model = dict( + backbone=dict( + type='mmcls.ConvNeXt', + arch='small', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.3, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + in_channels=[96, 192, 384, 768], + num_classes=150, + ), + auxiliary_head=dict(in_channels=384, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), +) + +optimizer = dict( + constructor='LearningRateDecayOptimizerConstructor', + _delete_=True, + type='AdamW', + lr=0.0001, + betas=(0.9, 0.999), + weight_decay=0.05, + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py new file mode 100644 index 0000000..35c72a8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_tiny_fp16_512x512_160k_ade20k.py @@ -0,0 +1,54 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +crop_size = (512, 512) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth' # noqa +model = dict( + backbone=dict( + type='mmcls.ConvNeXt', + arch='tiny', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + in_channels=[96, 192, 384, 768], + num_classes=150, + ), + auxiliary_head=dict(in_channels=384, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(341, 341)), +) + +optimizer = dict( + constructor='LearningRateDecayOptimizerConstructor', + _delete_=True, + type='AdamW', + lr=0.0001, + betas=(0.9, 0.999), + weight_decay=0.05, + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 6 + }) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py new file mode 100644 index 0000000..0e2f38e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/convnext/upernet_convnext_xlarge_fp16_640x640_160k_ade20k.py @@ -0,0 +1,55 @@ +_base_ = [ + '../_base_/models/upernet_convnext.py', + '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +crop_size = (640, 640) +checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-xlarge_3rdparty_in21k_20220301-08aa5ddc.pth' # noqa +model = dict( + backbone=dict( + type='mmcls.ConvNeXt', + arch='xlarge', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + in_channels=[256, 512, 1024, 2048], + num_classes=150, + ), + auxiliary_head=dict(in_channels=1024, num_classes=150), + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(426, 426)), +) + +optimizer = dict( + constructor='LearningRateDecayOptimizerConstructor', + _delete_=True, + type='AdamW', + lr=0.00008, + betas=(0.9, 0.999), + weight_decay=0.05, + paramwise_cfg={ + 'decay_rate': 0.9, + 'decay_type': 'stage_wise', + 'num_layers': 12 + }) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale='dynamic') +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/README.md new file mode 100644 index 0000000..ac76340 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/README.md @@ -0,0 +1,67 @@ +# DANet + +[Dual Attention Network for Scene Segmentation](https://arxiv.org/abs/1809.02983) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +In this paper, we address the scene segmentation task by capturing rich contextual dependencies based on the selfattention mechanism. Unlike previous works that capture contexts by multi-scale features fusion, we propose a Dual Attention Networks (DANet) to adaptively integrate local features with their global dependencies. Specifically, we append two types of attention modules on top of traditional dilated FCN, which model the semantic interdependencies in spatial and channel dimensions respectively. The position attention module selectively aggregates the features at each position by a weighted sum of the features at all positions. Similar features would be related to each other regardless of their distances. Meanwhile, the channel attention module selectively emphasizes interdependent channel maps by integrating associated features among all channel maps. We sum the outputs of the two attention modules to further improve feature representation which contributes to more precise segmentation results. We achieve new state-of-the-art segmentation performance on three challenging scene segmentation datasets, i.e., Cityscapes, PASCAL Context and COCO Stuff dataset. In particular, a Mean IoU score of 81.5% on Cityscapes test set is achieved without using coarse data. We make the code and trained model publicly available at [this https URL](https://github.com/junfu1115/DANet). + + + +
+ +
+ +## Citation + +```bibtex +@article{fu2018dual, + title={Dual Attention Network for Scene Segmentation}, + author={Jun Fu, Jing Liu, Haijie Tian, Yong Li, Yongjun Bao, Zhiwei Fang,and Hanqing Lu}, + booktitle={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DANet | R-50-D8 | 512x1024 | 40000 | 7.4 | 2.66 | 78.74 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324-c0dbfa5f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324.log.json) | +| DANet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.99 | 80.52 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831-c57a7157.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831.log.json) | +| DANet | R-50-D8 | 769x769 | 40000 | 8.8 | 1.56 | 78.88 | 80.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703-76681c60.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703.log.json) | +| DANet | R-101-D8 | 769x769 | 40000 | 12.8 | 1.07 | 79.88 | 81.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717-dcb7fd4e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717.log.json) | +| DANet | R-50-D8 | 512x1024 | 80000 | - | - | 79.34 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029-2bfa2293.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029.log.json) | +| DANet | R-101-D8 | 512x1024 | 80000 | - | - | 80.41 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918-955e6350.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918.log.json) | +| DANet | R-50-D8 | 769x769 | 80000 | - | - | 79.27 | 80.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954-495689b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954.log.json) | +| DANet | R-101-D8 | 769x769 | 80000 | - | - | 80.47 | 82.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918-f3a929e7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DANet | R-50-D8 | 512x512 | 80000 | 11.5 | 21.20 | 41.66 | 42.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125-edb18e08.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125.log.json) | +| DANet | R-101-D8 | 512x512 | 80000 | 15 | 14.18 | 43.64 | 45.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126-d0357c73.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126.log.json) | +| DANet | R-50-D8 | 512x512 | 160000 | - | - | 42.45 | 43.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340-9cb35dcd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340.log.json) | +| DANet | R-101-D8 | 512x512 | 160000 | - | - | 44.17 | 45.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348-23bf12f9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DANet | R-50-D8 | 512x512 | 20000 | 6.5 | 20.94 | 74.45 | 75.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026-9e9e3ab3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026.log.json) | +| DANet | R-101-D8 | 512x512 | 20000 | 9.9 | 13.76 | 76.02 | 77.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026-d48d23b2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026.log.json) | +| DANet | R-50-D8 | 512x512 | 40000 | - | - | 76.37 | 77.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526-426e3a64.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526.log.json) | +| DANet | R-101-D8 | 512x512 | 40000 | - | - | 76.51 | 77.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031-788e232a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V10.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V10.py new file mode 100644 index 0000000..83bf0a6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V10.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5,# Need to change + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, # Need to change + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v10' +img_norm_cfg = dict( + mean=[129.24574653, 114.02886291, 100.29403737], + std=[45.12454885, 45.51509298, 47.70796596], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[129.24574653, 114.02886291, 100.29403737], + std=[45.12454885, 45.51509298, 47.70796596], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[129.24574653, 114.02886291, 100.29403737], + std=[45.12454885, 45.51509298, 47.70796596], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v10', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[129.24574653, 114.02886291, 100.29403737], + std=[45.12454885, 45.51509298, 47.70796596], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v10', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[129.24574653, 114.02886291, 100.29403737], + std=[45.12454885, 45.51509298, 47.70796596], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v10', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[129.24574653, 114.02886291, 100.29403737], + std=[45.12454885, 45.51509298, 47.70796596], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V10_USE_V7_val_test')) + ]) +auto_resume = False +work_dir = 'work_dirs/V10_USE_V7_val_test' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V11.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V11.py new file mode 100644 index 0000000..7fddbef --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V11.py @@ -0,0 +1,205 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + #ignore_index=0,# This parameter is change Ignore specified label index in loss calculation + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=3, # CHANGED + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0 + #class_weight=[0.0, 1.5, 1.5]# This parameter is change Class Balanced Loss + )), + auxiliary_head=dict( + type='FCNHead', + #ignore_index=0,# This parameter is change Ignore specified label index in loss calculation + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=3, # CHANGED + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v11' +img_norm_cfg = dict( + mean=[126.44467916, 109.65090088, 94.61724914], + std=[40.52002353, 40.24915998, 41.4407848], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[126.44467916, 109.65090088, 94.61724914], + std=[40.52002353, 40.24915998, 41.4407848], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[126.44467916, 109.65090088, 94.61724914], + std=[40.52002353, 40.24915998, 41.4407848], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=1, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v11', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[126.44467916, 109.65090088, 94.61724914], + std=[40.52002353, 40.24915998, 41.4407848], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v11', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[126.44467916, 109.65090088, 94.61724914], + std=[40.52002353, 40.24915998, 41.4407848], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v11', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[126.44467916, 109.65090088, 94.61724914], + std=[40.52002353, 40.24915998, 41.4407848], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +#load_from = 'work_dirs/danet_r101-d8_512x512_40k_voc12aug_Black_overlap_v6/epoch_45.pth' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + ############# THay đổi từ đây + # type = 'RMSprop', + # lr = 3e-05, + # alpha = 0.99, + # weight_decay = 0 + ################################ + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=2600, # CHANGED + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V11')) + ]) +auto_resume = False +work_dir = 'work_dirs/V11' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7.py new file mode 100644 index 0000000..dacfe09 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_BL_RB_BR_SV_cropoverlap_r101_200epochs')) + ]) +auto_resume = False +work_dir = 'work_dirs/danet_r101_BL_RB_BR_SV_overlap_v7' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_RECHECK.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_RECHECK.py new file mode 100644 index 0000000..060526f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_RECHECK.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_RECHECK')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_RECHECK' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV1.py new file mode 100644 index 0000000..39a800f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV1.py @@ -0,0 +1,194 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV1_BL_RB_BR_SV_cropoverlap_r50_200epochs')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV1' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV10.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV10.py new file mode 100644 index 0000000..b8f1a82 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV10.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[131.77957926, 117.09519051, 102.20101477], + std=[46.92492361, 47.75973528, 50.72204598], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.77957926, 117.09519051, 102.20101477], + std=[46.92492361, 47.75973528, 50.72204598], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.77957926, 117.09519051, 102.20101477], + std=[46.92492361, 47.75973528, 50.72204598], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/trainv2', + ann_dir='ann_dir/trainv2', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.77957926, 117.09519051, 102.20101477], + std=[46.92492361, 47.75973528, 50.72204598], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.77957926, 117.09519051, 102.20101477], + std=[46.92492361, 47.75973528, 50.72204598], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.77957926, 117.09519051, 102.20101477], + std=[46.92492361, 47.75973528, 50.72204598], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV10')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV10' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV11.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV11.py new file mode 100644 index 0000000..96688b6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV11.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[131.81343698, 116.93745941, 102.12945703], + std=[45.89227257, 46.80291657, 49.74946225], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.81343698, 116.93745941, 102.12945703], + std=[45.89227257, 46.80291657, 49.74946225], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.81343698, 116.93745941, 102.12945703], + std=[45.89227257, 46.80291657, 49.74946225], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/trainv3', + ann_dir='ann_dir/trainv3', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.81343698, 116.93745941, 102.12945703], + std=[45.89227257, 46.80291657, 49.74946225], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.81343698, 116.93745941, 102.12945703], + std=[45.89227257, 46.80291657, 49.74946225], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.81343698, 116.93745941, 102.12945703], + std=[45.89227257, 46.80291657, 49.74946225], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV11')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV11' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV12.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV12.py new file mode 100644 index 0000000..123d63c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV12.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV12')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV12' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV13.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV13.py new file mode 100644 index 0000000..940eccd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV13.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/trainv2', + ann_dir='ann_dir/trainv2', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.88845004, 117.18301931, 102.46388292], + std=[47.07905839, 47.90917573, 50.86488438], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV13')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV13' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV14.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV14.py new file mode 100644 index 0000000..8bc8836 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV14.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[127.76595458, 113.21066312, 100.03779176], + std=[36.10171351, 36.77637351, 38.99034344], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[127.76595458, 113.21066312, 100.03779176], + std=[36.10171351, 36.77637351, 38.99034344], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[127.76595458, 113.21066312, 100.03779176], + std=[36.10171351, 36.77637351, 38.99034344], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[127.76595458, 113.21066312, 100.03779176], + std=[36.10171351, 36.77637351, 38.99034344], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[127.76595458, 113.21066312, 100.03779176], + std=[36.10171351, 36.77637351, 38.99034344], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[127.76595458, 113.21066312, 100.03779176], + std=[36.10171351, 36.77637351, 38.99034344], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV14')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV14' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV15.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV15.py new file mode 100644 index 0000000..eddf3c5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV15.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[131.68739912, 117.09166126, 102.43975852], + std=[46.6559526, 47.47169675, 50.40012985], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.68739912, 117.09166126, 102.43975852], + std=[46.6559526, 47.47169675, 50.40012985], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.68739912, 117.09166126, 102.43975852], + std=[46.6559526, 47.47169675, 50.40012985], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/trainv2', + ann_dir='ann_dir/trainv2', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.68739912, 117.09166126, 102.43975852], + std=[46.6559526, 47.47169675, 50.40012985], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.68739912, 117.09166126, 102.43975852], + std=[46.6559526, 47.47169675, 50.40012985], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.68739912, 117.09166126, 102.43975852], + std=[46.6559526, 47.47169675, 50.40012985], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV15')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV15' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV16.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV16.py new file mode 100644 index 0000000..7c779bf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV16.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[128.94353278, 114.41115685, 99.30756878], + std=[36.28038875, 37.12411786, 39.68638399], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[128.94353278, 114.41115685, 99.30756878], + std=[36.28038875, 37.12411786, 39.68638399], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[128.94353278, 114.41115685, 99.30756878], + std=[36.28038875, 37.12411786, 39.68638399], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[128.94353278, 114.41115685, 99.30756878], + std=[36.28038875, 37.12411786, 39.68638399], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[128.94353278, 114.41115685, 99.30756878], + std=[36.28038875, 37.12411786, 39.68638399], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[128.94353278, 114.41115685, 99.30756878], + std=[36.28038875, 37.12411786, 39.68638399], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV16')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV16' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV17.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV17.py new file mode 100644 index 0000000..a4d17a7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV17.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[131.96288137, 117.33735249, 102.68881609], + std=[46.9112306, 47.76184599, 50.73116402], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.96288137, 117.33735249, 102.68881609], + std=[46.9112306, 47.76184599, 50.73116402], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.96288137, 117.33735249, 102.68881609], + std=[46.9112306, 47.76184599, 50.73116402], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/trainv2', + ann_dir='ann_dir/trainv2', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.96288137, 117.33735249, 102.68881609], + std=[46.9112306, 47.76184599, 50.73116402], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.96288137, 117.33735249, 102.68881609], + std=[46.9112306, 47.76184599, 50.73116402], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.96288137, 117.33735249, 102.68881609], + std=[46.9112306, 47.76184599, 50.73116402], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV17')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV17' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV2.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV2.py new file mode 100644 index 0000000..25a1fb6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV2.py @@ -0,0 +1,194 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = 'work_dirs/danet_r101-d8_512x512_40k_voc12aug_Black_overlap_v6/epoch_45.pth' +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV2_BL_RB_BR_SV_cropoverlap_r101_200epochs_from_black_pretrained')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV2' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV3.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV3.py new file mode 100644 index 0000000..b04740e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV3.py @@ -0,0 +1,201 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + ###### Changed from here + # type='AdamW', + # lr=3e-05, + # betas=(0.9, 0.999), + # weight_decay=0.01, + ########################### + type='SGD', + lr = 0.001, + momentum = 0.9, + weight_decay=0.0005, + ########################### + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=4188, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV3_BL_RB_BR_SV_cropoverlap_r101_200epochs_from_scratch_change_hyperparameter')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV3' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV4.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV4.py new file mode 100644 index 0000000..9f96844 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV4.py @@ -0,0 +1,202 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + #pam_channels=64, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=1, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + ###### Changed from here + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + ########################### + # type='SGD', + # lr = 0.001, + # momentum = 0.9, + # weight_decay=0.0005, + ########################### + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=4188, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV4_BL_RB_BR_SV_cropoverlap_r101_200epochs_from_scratch_PSP_decoder')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV4' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV5.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV5.py new file mode 100644 index 0000000..e1ce085 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV5.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py',# ok, giữ nguyên + '../_base_/datasets/CustomDataset.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=5), auxiliary_head=dict(num_classes=5)) +# sửa tất cả đường dẫn và tên file mình đã cài đặt từ trước trong mục _base_ của file này \ No newline at end of file diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV6.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV6.py new file mode 100644 index 0000000..c59990c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV6.py @@ -0,0 +1,205 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='CosineAnnealing', + warmup='linear', + warmup_iters=1000, + warmup_ratio=1.0/10, + #power=1.0, + min_lr=1e-5, + by_epoch=True) +momentum_config = dict( + policy='cyclic', + target_ratio=(0.85 / 0.95, 1), + cyclic_times=1, + step_ratio_up=0.4, +) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV6')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV6' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV7.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV7.py new file mode 100644 index 0000000..5fdae64 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV7.py @@ -0,0 +1,195 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) + +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV7')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV7' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV8.py new file mode 100644 index 0000000..34a2e1e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV8.py @@ -0,0 +1,194 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained = None, + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.01150988, 117.50650988, 102.74611112], + std=[48.42106271, 49.25131565, 52.27428472], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV8')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV8' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV9.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV9.py new file mode 100644 index 0000000..84e6bbc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_REV9.py @@ -0,0 +1,199 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[131.73912267, 116.75943868, 101.58983944], + std=[43.77987289, 44.80780042, 47.87915058], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.73912267, 116.75943868, 101.58983944], + std=[43.77987289, 44.80780042, 47.87915058], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.73912267, 116.75943868, 101.58983944], + std=[43.77987289, 44.80780042, 47.87915058], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[131.73912267, 116.75943868, 101.58983944], + std=[43.77987289, 44.80780042, 47.87915058], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.73912267, 116.75943868, 101.58983944], + std=[43.77987289, 44.80780042, 47.87915058], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[131.73912267, 116.75943868, 101.58983944], + std=[43.77987289, 44.80780042, 47.87915058], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_REV9')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_REV9' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_SPECIAL.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_SPECIAL.py new file mode 100644 index 0000000..3bec490 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V7_SPECIAL.py @@ -0,0 +1,200 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v7' +img_norm_cfg = dict( + mean=[120.33951042, 108.19922369, 97.50304326], + std=[57.72659528, 54.49106094, 54.09604289], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[120.33951042, 108.19922369, 97.50304326], + std=[57.72659528, 54.49106094, 54.09604289], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[120.33951042, 108.19922369, 97.50304326], + std=[57.72659528, 54.49106094, 54.09604289], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[120.33951042, 108.19922369, 97.50304326], + std=[57.72659528, 54.49106094, 54.09604289], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[120.33951042, 108.19922369, 97.50304326], + std=[57.72659528, 54.49106094, 54.09604289], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v7', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[120.33951042, 108.19922369, 97.50304326], + std=[57.72659528, 54.49106094, 54.09604289], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ], + split=None, + img_suffix='.png', + seg_map_suffix='.png', + classes=('background', 'black', 'brown', 'rainbow', 'silver'), + palette=[[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +#load_from = 'work_dirs/V7/epoch_49.pth' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=1000, + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V7_SPECIAL')) + ]) +auto_resume = False +work_dir = 'work_dirs/V7_SPECIAL' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V8.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V8.py new file mode 100644 index 0000000..82d2b11 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V8.py @@ -0,0 +1,205 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + #ignore_index=0,# This parameter is change Ignore specified label index in loss calculation + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=3, # CHANGED + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0 + #class_weight=[0.0, 1.5, 1.5]# This parameter is change Class Balanced Loss + )), + auxiliary_head=dict( + type='FCNHead', + #ignore_index=0,# This parameter is change Ignore specified label index in loss calculation + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=3, # CHANGED + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v8' +img_norm_cfg = dict( + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=1, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v8', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v8', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v8', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +#load_from = 'work_dirs/danet_r101-d8_512x512_40k_voc12aug_Black_overlap_v6/epoch_45.pth' +load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + ############# THay đổi từ đây + # type = 'RMSprop', + # lr = 3e-05, + # alpha = 0.99, + # weight_decay = 0 + ################################ + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=2600, # CHANGED + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V8_REV1')) + ]) +auto_resume = False +work_dir = 'work_dirs/V8_REV1' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V8_REV2.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V8_REV2.py new file mode 100644 index 0000000..99982c4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/V8_REV2.py @@ -0,0 +1,205 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + type='ResNetV1c', + depth=101, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='SyncBN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DAHead', + #ignore_index=0,# This parameter is change Ignore specified label index in loss calculation + in_channels=2048, + in_index=3, + channels=512, + pam_channels=64, + dropout_ratio=0.1, + num_classes=3, # CHANGED + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0 + #class_weight=[0.0, 1.5, 1.5]# This parameter is change Class Balanced Loss + )), + auxiliary_head=dict( + type='FCNHead', + #ignore_index=0,# This parameter is change Ignore specified label index in loss calculation + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=3, # CHANGED + norm_cfg=dict(type='SyncBN', requires_grad=True), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + train_cfg=dict(), + test_cfg=dict(mode='whole')) +dataset_type = 'CustomDataset' +data_root = 'data/my_dataset_v8' +img_norm_cfg = dict( + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, + workers_per_gpu=1, + train=dict( + type='CustomDataset', + data_root='data/my_dataset_v8', + img_dir='img_dir/train', + ann_dir='ann_dir/train', + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(512, 512)), + dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True), + dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( + type='CustomDataset', + data_root='data/my_dataset_v8', + img_dir='img_dir/val', + ann_dir='ann_dir/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CustomDataset', + data_root='data/my_dataset_v8', + img_dir='img_dir/test', + ann_dir='ann_dir/test', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[132.4797778, 120.79307456, 109.92839975], + std=[49.47761295, 49.07102224, 51.32513935], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = 'work_dirs/danet_r101-d8_512x512_40k_voc12aug_Black_overlap_v6/epoch_45.pth' +#load_from = None +resume_from = None +workflow = [('train', 1), ('val', 1)] +cudnn_benchmark = True +optimizer = dict( + type='AdamW', + lr=3e-05, + betas=(0.9, 0.999), + weight_decay=0.01, + ############# THay đổi từ đây + # type = 'RMSprop', + # lr = 3e-05, + # alpha = 0.99, + # weight_decay = 0 + ################################ + paramwise_cfg=dict( + custom_keys=dict( + pos_block=dict(decay_mult=0.0), + norm=dict(decay_mult=0.0), + head=dict(lr_mult=10.0)))) +optimizer_config = dict() +lr_config = dict( + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-06, + power=1.0, + min_lr=0.0, + by_epoch=False) +runner = dict(type='EpochBasedRunner', max_epochs=200) +checkpoint_config = dict(by_epoch=True, interval=1) +evaluation = dict(by_epoch=True, interval=1, metric='mIoU') +log_config = dict( + interval=2600, # CHANGED + hooks=[ + dict(type='TextLoggerHook'), + dict( + type='WandbLoggerHook', + init_kwargs=dict( + project='Oil_Spill', + name='V8_REV2')) + ]) +auto_resume = False +work_dir = 'work_dirs/V8_REV2' +gpu_ids = [0] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet.yml new file mode 100644 index 0000000..ca2d6ff --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet.yml @@ -0,0 +1,301 @@ +Collections: +- Name: DANet + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + URL: https://arxiv.org/abs/1809.02983 + Title: Dual Attention Network for Scene Segmentation + README: configs/danet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/da_head.py#L76 + Version: v0.17.0 + Converted From: + Code: https://github.com/junfu1115/DANet/ +Models: +- Name: danet_r50-d8_512x1024_40k_cityscapes + In Collection: DANet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 375.94 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 7.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.74 + Config: configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_40k_cityscapes/danet_r50-d8_512x1024_40k_cityscapes_20200605_191324-c0dbfa5f.pth +- Name: danet_r101-d8_512x1024_40k_cityscapes + In Collection: DANet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 502.51 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 10.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.52 + Config: configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_40k_cityscapes/danet_r101-d8_512x1024_40k_cityscapes_20200605_200831-c57a7157.pth +- Name: danet_r50-d8_769x769_40k_cityscapes + In Collection: DANet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 641.03 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 8.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.88 + mIoU(ms+flip): 80.62 + Config: configs/danet/danet_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_40k_cityscapes/danet_r50-d8_769x769_40k_cityscapes_20200530_025703-76681c60.pth +- Name: danet_r101-d8_769x769_40k_cityscapes + In Collection: DANet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 934.58 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 12.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.88 + mIoU(ms+flip): 81.47 + Config: configs/danet/danet_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_40k_cityscapes/danet_r101-d8_769x769_40k_cityscapes_20200530_025717-dcb7fd4e.pth +- Name: danet_r50-d8_512x1024_80k_cityscapes + In Collection: DANet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.34 + Config: configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x1024_80k_cityscapes/danet_r50-d8_512x1024_80k_cityscapes_20200607_133029-2bfa2293.pth +- Name: danet_r101-d8_512x1024_80k_cityscapes + In Collection: DANet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.41 + Config: configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x1024_80k_cityscapes/danet_r101-d8_512x1024_80k_cityscapes_20200607_132918-955e6350.pth +- Name: danet_r50-d8_769x769_80k_cityscapes + In Collection: DANet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.27 + mIoU(ms+flip): 80.96 + Config: configs/danet/danet_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_769x769_80k_cityscapes/danet_r50-d8_769x769_80k_cityscapes_20200607_132954-495689b4.pth +- Name: danet_r101-d8_769x769_80k_cityscapes + In Collection: DANet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.47 + mIoU(ms+flip): 82.02 + Config: configs/danet/danet_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_769x769_80k_cityscapes/danet_r101-d8_769x769_80k_cityscapes_20200607_132918-f3a929e7.pth +- Name: danet_r50-d8_512x512_80k_ade20k + In Collection: DANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 47.17 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 11.5 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.66 + mIoU(ms+flip): 42.9 + Config: configs/danet/danet_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_80k_ade20k/danet_r50-d8_512x512_80k_ade20k_20200615_015125-edb18e08.pth +- Name: danet_r101-d8_512x512_80k_ade20k + In Collection: DANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 70.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 15.0 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.64 + mIoU(ms+flip): 45.19 + Config: configs/danet/danet_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_80k_ade20k/danet_r101-d8_512x512_80k_ade20k_20200615_015126-d0357c73.pth +- Name: danet_r50-d8_512x512_160k_ade20k + In Collection: DANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.45 + mIoU(ms+flip): 43.25 + Config: configs/danet/danet_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_160k_ade20k/danet_r50-d8_512x512_160k_ade20k_20200616_082340-9cb35dcd.pth +- Name: danet_r101-d8_512x512_160k_ade20k + In Collection: DANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.17 + mIoU(ms+flip): 45.02 + Config: configs/danet/danet_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_160k_ade20k/danet_r101-d8_512x512_160k_ade20k_20200616_082348-23bf12f9.pth +- Name: danet_r50-d8_512x512_20k_voc12aug + In Collection: DANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 47.76 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.5 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.45 + mIoU(ms+flip): 75.69 + Config: configs/danet/danet_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_20k_voc12aug/danet_r50-d8_512x512_20k_voc12aug_20200618_070026-9e9e3ab3.pth +- Name: danet_r101-d8_512x512_20k_voc12aug + In Collection: DANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 72.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.9 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.02 + mIoU(ms+flip): 77.23 + Config: configs/danet/danet_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_20k_voc12aug/danet_r101-d8_512x512_20k_voc12aug_20200618_070026-d48d23b2.pth +- Name: danet_r50-d8_512x512_40k_voc12aug + In Collection: DANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.37 + mIoU(ms+flip): 77.29 + Config: configs/danet/danet_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r50-d8_512x512_40k_voc12aug/danet_r50-d8_512x512_40k_voc12aug_20200613_235526-426e3a64.pth +- Name: danet_r101-d8_512x512_40k_voc12aug + In Collection: DANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.51 + mIoU(ms+flip): 77.32 + Config: configs/danet/danet_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/danet/danet_r101-d8_512x512_40k_voc12aug/danet_r101-d8_512x512_40k_voc12aug_20200613_223031-788e232a.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..3bfb9bd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x1024_40k_cityscapes_AN.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x1024_40k_cityscapes_AN.py new file mode 100644 index 0000000..e1ce085 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x1024_40k_cityscapes_AN.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py',# ok, giữ nguyên + '../_base_/datasets/CustomDataset.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=5), auxiliary_head=dict(num_classes=5)) +# sửa tất cả đường dẫn và tên file mình đã cài đặt từ trước trong mục _base_ của file này \ No newline at end of file diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..d80b2ec --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..0f22d0f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..709f93c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..5c623eb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..bd31bc8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..597d76d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..70f9b31 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './danet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..1b70c5b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..0373431 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..22aaf85 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..010f86f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..0cef0f0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..154e848 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..5c5b94e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..c7237ae --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/danet/danet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/danet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/README.md new file mode 100644 index 0000000..4985660 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/README.md @@ -0,0 +1,117 @@ +# DeepLabV3 + +[Rethinking atrous convolution for semantic image segmentation](https://arxiv.org/abs/1706.05587) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +In this work, we revisit atrous convolution, a powerful tool to explicitly adjust filter's field-of-view as well as control the resolution of feature responses computed by Deep Convolutional Neural Networks, in the application of semantic image segmentation. To handle the problem of segmenting objects at multiple scales, we design modules which employ atrous convolution in cascade or in parallel to capture multi-scale context by adopting multiple atrous rates. Furthermore, we propose to augment our previously proposed Atrous Spatial Pyramid Pooling module, which probes convolutional features at multiple scales, with image-level features encoding global context and further boost performance. We also elaborate on implementation details and share our experience on training our system. The proposed \`DeepLabv3' system significantly improves over our previous DeepLab versions without DenseCRF post-processing and attains comparable performance with other state-of-art models on the PASCAL VOC 2012 semantic image segmentation benchmark. + + + +
+ +
+ +## Citation + +```bibtext +@article{chen2017rethinking, + title={Rethinking atrous convolution for semantic image segmentation}, + author={Chen, Liang-Chieh and Papandreou, George and Schroff, Florian and Adam, Hartwig}, + journal={arXiv preprint arXiv:1706.05587}, + year={2017} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | --------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3 | R-50-D8 | 512x1024 | 40000 | 6.1 | 2.57 | 79.09 | 80.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449.log.json) | +| DeepLabV3 | R-101-D8 | 512x1024 | 40000 | 9.6 | 1.92 | 77.12 | 79.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241.log.json) | +| DeepLabV3 | R-50-D8 | 769x769 | 40000 | 6.9 | 1.11 | 78.58 | 79.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723.log.json) | +| DeepLabV3 | R-101-D8 | 769x769 | 40000 | 10.9 | 0.83 | 79.27 | 80.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809.log.json) | +| DeepLabV3 | R-18-D8 | 512x1024 | 80000 | 1.7 | 13.78 | 76.70 | 78.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes_20201225_021506-23dffbe2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes-20201225_021506.log.json) | +| DeepLabV3 | R-50-D8 | 512x1024 | 80000 | - | - | 79.32 | 80.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404.log.json) | +| DeepLabV3 | R-101-D8 | 512x1024 | 80000 | - | - | 80.20 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503.log.json) | +| DeepLabV3 (FP16) | R-101-D8 | 512x1024 | 80000 | 5.75 | 3.86 | 80.48 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-774d9cec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json) | +| DeepLabV3 | R-18-D8 | 769x769 | 80000 | 1.9 | 5.55 | 76.60 | 78.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes_20201225_021506-6452126a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes-20201225_021506.log.json) | +| DeepLabV3 | R-50-D8 | 769x769 | 80000 | - | - | 79.89 | 81.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338.log.json) | +| DeepLabV3 | R-101-D8 | 769x769 | 80000 | - | - | 79.67 | 80.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353.log.json) | +| DeepLabV3 | R-101-D16-MG124 | 512x1024 | 40000 | 4.7 | - 6.96 | 76.71 | 78.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-67b0c992.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) | +| DeepLabV3 | R-101-D16-MG124 | 512x1024 | 80000 | - | - | 78.36 | 79.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-57bb8425.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) | +| DeepLabV3 | R-18b-D8 | 512x1024 | 80000 | 1.6 | 13.93 | 76.26 | 77.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes-20201225_094144.log.json) | +| DeepLabV3 | R-50b-D8 | 512x1024 | 80000 | 6.0 | 2.74 | 79.63 | 80.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes-20201225_155148.log.json) | +| DeepLabV3 | R-101b-D8 | 512x1024 | 80000 | 9.5 | 1.81 | 80.01 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes-20201226_171821.log.json) | +| DeepLabV3 | R-18b-D8 | 769x769 | 80000 | 1.8 | 5.79 | 76.63 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes-20201225_094144.log.json) | +| DeepLabV3 | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.16 | 78.80 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes-20201225_155404.log.json) | +| DeepLabV3 | R-101b-D8 | 769x769 | 80000 | 10.7 | 0.82 | 79.41 | 80.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes-20201226_190843.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 80000 | 8.9 | 14.76 | 42.42 | 43.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 80000 | 12.4 | 10.14 | 44.08 | 45.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 160000 | - | - | 42.66 | 44.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 160000 | - | - | 45.00 | 46.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 20000 | 6.1 | 13.88 | 76.17 | 77.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 9.6 | 9.81 | 78.70 | 79.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | 77.68 | 78.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | 77.92 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-101-D8 | 480x480 | 40000 | 9.2 | 7.09 | 46.55 | 47.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context-20200911_204118.log.json) | +| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | 46.42 | 47.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context-20200911_170155.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-101-D8 | 480x480 | 40000 | - | - | 52.61 | 54.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59_20210416_110332-cb08ea46.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59-20210416_110332.log.json) | +| DeepLabV3 | R-101-D8 | 480x480 | 80000 | - | - | 52.46 | 54.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59_20210416_113002-26303993.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59-20210416_113002.log.json) | + +### COCO-Stuff 10k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 20000 | 9.6 | 10.8 | 34.66 | 36.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-b35f789d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 20000 | 13.2 | 8.7 | 37.30 | 38.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-c49752cb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 40000 | - | - | 35.73 | 37.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-dc76f3ff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 40000 | - | - | 37.81 | 38.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-636cb433.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305.log.json) | + +### COCO-Stuff 164k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3 | R-50-D8 | 512x512 | 80000 | 9.6 | 10.8 | 39.38 | 40.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016-88675c24.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 80000 | 13.2 | 8.7 | 40.87 | 41.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252-13600dc2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 160000 | - | - | 41.09 | 41.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016-49f2812b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 160000 | - | - | 41.82 | 42.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402-f035acfd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402.log.json) | +| DeepLabV3 | R-50-D8 | 512x512 | 320000 | - | - | 41.37 | 42.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403-51b21115.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403.log.json) | +| DeepLabV3 | R-101-D8 | 512x512 | 320000 | - | - | 42.61 | 43.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402-3cbca14d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402.log.json) | + +Note: + +- `D-8` here corresponding to the output stride 8 setting for DeepLab series. +- `FP16` means Mixed Precision (FP16) is adopted in training. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3.yml new file mode 100644 index 0000000..559af4f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3.yml @@ -0,0 +1,756 @@ +Collections: +- Name: DeepLabV3 + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + - COCO-Stuff 10k + - COCO-Stuff 164k + Paper: + URL: https://arxiv.org/abs/1706.05587 + Title: Rethinking atrous convolution for semantic image segmentation + README: configs/deeplabv3/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/aspp_head.py#L54 + Version: v0.17.0 + Converted From: + Code: https://github.com/tensorflow/models/tree/master/research/deeplab +Models: +- Name: deeplabv3_r50-d8_512x1024_40k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 389.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.1 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.09 + mIoU(ms+flip): 80.45 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes/deeplabv3_r50-d8_512x1024_40k_cityscapes_20200605_022449-acadc2f8.pth +- Name: deeplabv3_r101-d8_512x1024_40k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 520.83 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.6 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.12 + mIoU(ms+flip): 79.61 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes/deeplabv3_r101-d8_512x1024_40k_cityscapes_20200605_012241-7fd3f799.pth +- Name: deeplabv3_r50-d8_769x769_40k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 900.9 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.58 + mIoU(ms+flip): 79.89 + Config: configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes/deeplabv3_r50-d8_769x769_40k_cityscapes_20200606_113723-7eda553c.pth +- Name: deeplabv3_r101-d8_769x769_40k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 1204.82 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.27 + mIoU(ms+flip): 80.11 + Config: configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes/deeplabv3_r101-d8_769x769_40k_cityscapes_20200606_113809-c64f889f.pth +- Name: deeplabv3_r18-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-18-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 72.57 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 1.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.7 + mIoU(ms+flip): 78.27 + Config: configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes/deeplabv3_r18-d8_512x1024_80k_cityscapes_20201225_021506-23dffbe2.pth +- Name: deeplabv3_r50-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.32 + mIoU(ms+flip): 80.57 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes/deeplabv3_r50-d8_512x1024_80k_cityscapes_20200606_113404-b92cfdd4.pth +- Name: deeplabv3_r101-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.2 + mIoU(ms+flip): 81.21 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes/deeplabv3_r101-d8_512x1024_80k_cityscapes_20200606_113503-9e428899.pth +- Name: deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 259.07 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (512,1024) + Training Memory (GB): 5.75 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.48 + Config: configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-774d9cec.pth +- Name: deeplabv3_r18-d8_769x769_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-18-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 180.18 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 1.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.6 + mIoU(ms+flip): 78.26 + Config: configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes/deeplabv3_r18-d8_769x769_80k_cityscapes_20201225_021506-6452126a.pth +- Name: deeplabv3_r50-d8_769x769_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.89 + mIoU(ms+flip): 81.06 + Config: configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes/deeplabv3_r50-d8_769x769_80k_cityscapes_20200606_221338-788d6228.pth +- Name: deeplabv3_r101-d8_769x769_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.67 + mIoU(ms+flip): 80.81 + Config: configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes/deeplabv3_r101-d8_769x769_80k_cityscapes_20200607_013353-60e95418.pth +- Name: deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D16-MG124 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.36 + mIoU(ms+flip): 79.84 + Config: configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-57bb8425.pth +- Name: deeplabv3_r18b-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-18b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 71.79 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 1.6 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.26 + mIoU(ms+flip): 77.88 + Config: configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes/deeplabv3_r18b-d8_512x1024_80k_cityscapes_20201225_094144-46040cef.pth +- Name: deeplabv3_r50b-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-50b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 364.96 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.63 + mIoU(ms+flip): 80.98 + Config: configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes/deeplabv3_r50b-d8_512x1024_80k_cityscapes_20201225_155148-ec368954.pth +- Name: deeplabv3_r101b-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-101b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 552.49 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.01 + mIoU(ms+flip): 81.21 + Config: configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes/deeplabv3_r101b-d8_512x1024_80k_cityscapes_20201226_171821-8fd49503.pth +- Name: deeplabv3_r18b-d8_769x769_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-18b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 172.71 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 1.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.63 + mIoU(ms+flip): 77.51 + Config: configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes/deeplabv3_r18b-d8_769x769_80k_cityscapes_20201225_094144-fdc985d9.pth +- Name: deeplabv3_r50b-d8_769x769_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-50b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 862.07 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.8 + mIoU(ms+flip): 80.27 + Config: configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes/deeplabv3_r50b-d8_769x769_80k_cityscapes_20201225_155404-87fb0cf4.pth +- Name: deeplabv3_r101b-d8_769x769_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: R-101b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 1219.51 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.41 + mIoU(ms+flip): 80.73 + Config: configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes/deeplabv3_r101b-d8_769x769_80k_cityscapes_20201226_190843-9142ee57.pth +- Name: deeplabv3_r50-d8_512x512_80k_ade20k + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 67.75 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.9 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.42 + mIoU(ms+flip): 43.28 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k/deeplabv3_r50-d8_512x512_80k_ade20k_20200614_185028-0bb3f844.pth +- Name: deeplabv3_r101-d8_512x512_80k_ade20k + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 98.62 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.4 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.08 + mIoU(ms+flip): 45.19 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k/deeplabv3_r101-d8_512x512_80k_ade20k_20200615_021256-d89c7fa4.pth +- Name: deeplabv3_r50-d8_512x512_160k_ade20k + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.66 + mIoU(ms+flip): 44.09 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k/deeplabv3_r50-d8_512x512_160k_ade20k_20200615_123227-5d0ee427.pth +- Name: deeplabv3_r101-d8_512x512_160k_ade20k + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.0 + mIoU(ms+flip): 46.66 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k/deeplabv3_r101-d8_512x512_160k_ade20k_20200615_105816-b1f72b3b.pth +- Name: deeplabv3_r50-d8_512x512_20k_voc12aug + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 72.05 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.1 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.17 + mIoU(ms+flip): 77.42 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug/deeplabv3_r50-d8_512x512_20k_voc12aug_20200617_010906-596905ef.pth +- Name: deeplabv3_r101-d8_512x512_20k_voc12aug + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 101.94 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.6 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.7 + mIoU(ms+flip): 79.95 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug/deeplabv3_r101-d8_512x512_20k_voc12aug_20200617_010932-8d13832f.pth +- Name: deeplabv3_r50-d8_512x512_40k_voc12aug + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.68 + mIoU(ms+flip): 78.78 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug/deeplabv3_r50-d8_512x512_40k_voc12aug_20200613_161546-2ae96e7e.pth +- Name: deeplabv3_r101-d8_512x512_40k_voc12aug + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.92 + mIoU(ms+flip): 79.18 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug/deeplabv3_r101-d8_512x512_40k_voc12aug_20200613_161432-0017d784.pth +- Name: deeplabv3_r101-d8_480x480_40k_pascal_context + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 40000 + inference time (ms/im): + - value: 141.04 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (480,480) + Training Memory (GB): 9.2 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.55 + mIoU(ms+flip): 47.81 + Config: configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context/deeplabv3_r101-d8_480x480_40k_pascal_context_20200911_204118-1aa27336.pth +- Name: deeplabv3_r101-d8_480x480_80k_pascal_context + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.42 + mIoU(ms+flip): 47.53 + Config: configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context/deeplabv3_r101-d8_480x480_80k_pascal_context_20200911_170155-2a21fff3.pth +- Name: deeplabv3_r101-d8_480x480_40k_pascal_context_59 + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.61 + mIoU(ms+flip): 54.28 + Config: configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59/deeplabv3_r101-d8_480x480_40k_pascal_context_59_20210416_110332-cb08ea46.pth +- Name: deeplabv3_r101-d8_480x480_80k_pascal_context_59 + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.46 + mIoU(ms+flip): 54.09 + Config: configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59/deeplabv3_r101-d8_480x480_80k_pascal_context_59_20210416_113002-26303993.pth +- Name: deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 92.59 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.6 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 34.66 + mIoU(ms+flip): 36.08 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-b35f789d.pth +- Name: deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 114.94 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 13.2 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.3 + mIoU(ms+flip): 38.42 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k_20210821_043025-c49752cb.pth +- Name: deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 35.73 + mIoU(ms+flip): 37.09 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-dc76f3ff.pth +- Name: deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.81 + mIoU(ms+flip): 38.8 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_043305-636cb433.pth +- Name: deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 92.59 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.6 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 39.38 + mIoU(ms+flip): 40.03 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k_20210709_163016-88675c24.pth +- Name: deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 114.94 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 13.2 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 40.87 + mIoU(ms+flip): 41.5 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k_20210709_201252-13600dc2.pth +- Name: deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.09 + mIoU(ms+flip): 41.69 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k_20210709_163016-49f2812b.pth +- Name: deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.82 + mIoU(ms+flip): 42.49 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k_20210709_155402-f035acfd.pth +- Name: deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k + In Collection: DeepLabV3 + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 320000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.37 + mIoU(ms+flip): 42.22 + Config: configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k_20210709_155403-51b21115.pth +- Name: deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k + In Collection: DeepLabV3 + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 320000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 42.61 + mIoU(ms+flip): 43.42 + Config: configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k_20210709_155402-3cbca14d.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..f20f260 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_40k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3_r50-d8_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..de4a8a5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d16-mg124_512x1024_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..0b5256f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..4874121 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_480x480_40k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..001b7a6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..032dc8b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_480x480_80k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..8c707c7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..6804a57 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..df6f36e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..40f5f62 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..fb2be22 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k.py new file mode 100644 index 0000000..76b1242 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_160k_coco-stuff164k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k.py new file mode 100644 index 0000000..d476c66 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_20k_coco-stuff10k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k.py new file mode 100644 index 0000000..50669c8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_320k_coco-stuff164k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k.py new file mode 100644 index 0000000..37d09cf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_40k_coco-stuff10k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k.py new file mode 100644 index 0000000..a0eb3dd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_4x4_80k_coco-stuff164k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..796ba3f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..e6d58a6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..13094a9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..e326109 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = './deeplabv3_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..5186bf6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..d185db9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r101b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..e084e95 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..a990c07 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..b25e725 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..fd920f0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r18b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..9d493ef --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..038993c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..71a0fda --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..bcdc0b4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..8e7420d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..132787d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..b4a9d4e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..f62da1a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..492bd3d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k.py new file mode 100644 index 0000000..22d647e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_160k_coco-stuff164k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k.py new file mode 100644 index 0000000..45e0b56 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_20k_coco-stuff10k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/coco-stuff10k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k.py new file mode 100644 index 0000000..3e43234 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_320k_coco-stuff164k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_320k.py' +] +model = dict( + decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k.py new file mode 100644 index 0000000..f02772a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_40k_coco-stuff10k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/coco-stuff10k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k.py new file mode 100644 index 0000000..8697e92 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_4x4_80k_coco-stuff164k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..78f4d0d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..e35d198 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..dd7c165 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..e742d9a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..332d9cf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3/deeplabv3_r50b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/README.md new file mode 100644 index 0000000..86b8bfb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/README.md @@ -0,0 +1,132 @@ +# DeepLabV3+ + +[Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Spatial pyramid pooling module or encode-decoder structure are used in deep neural networks for semantic segmentation task. The former networks are able to encode multi-scale contextual information by probing the incoming features with filters or pooling operations at multiple rates and multiple effective fields-of-view, while the latter networks can capture sharper object boundaries by gradually recovering the spatial information. In this work, we propose to combine the advantages from both methods. Specifically, our proposed model, DeepLabv3+, extends DeepLabv3 by adding a simple yet effective decoder module to refine the segmentation results especially along object boundaries. We further explore the Xception model and apply the depthwise separable convolution to both Atrous Spatial Pyramid Pooling and decoder modules, resulting in a faster and stronger encoder-decoder network. We demonstrate the effectiveness of the proposed model on PASCAL VOC 2012 and Cityscapes datasets, achieving the test set performance of 89.0% and 82.1% without any post-processing. Our paper is accompanied with a publicly available reference implementation of the proposed models in Tensorflow at [this https URL](https://github.com/tensorflow/models/tree/master/research/deeplab). + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{deeplabv3plus2018, + title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, + author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, + booktitle={ECCV}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ----------------- | --------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3+ | R-50-D8 | 512x1024 | 40000 | 7.5 | 3.94 | 79.61 | 81.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610.log.json) | +| DeepLabV3+ | R-101-D8 | 512x1024 | 40000 | 11 | 2.60 | 80.21 | 81.82 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614.log.json) | +| DeepLabV3+ | R-50-D8 | 769x769 | 40000 | 8.5 | 1.72 | 78.97 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143.log.json) | +| DeepLabV3+ | R-101-D8 | 769x769 | 40000 | 12.5 | 1.15 | 79.46 | 80.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304.log.json) | +| DeepLabV3+ | R-18-D8 | 512x1024 | 80000 | 2.2 | 14.27 | 76.89 | 78.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes-20201226_080942.log.json) | +| DeepLabV3+ | R-50-D8 | 512x1024 | 80000 | - | - | 80.09 | 81.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049.log.json) | +| DeepLabV3+ | R-101-D8 | 512x1024 | 80000 | - | - | 80.97 | 82.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143.log.json) | +| DeepLabV3+ (FP16) | R-101-D8 | 512x1024 | 80000 | 6.35 | 7.87 | 80.46 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920.log.json) | +| DeepLabV3+ | R-18-D8 | 769x769 | 80000 | 2.5 | 5.74 | 76.26 | 77.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes_20201226_083346-f326e06a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes-20201226_083346.log.json) | +| DeepLabV3+ | R-50-D8 | 769x769 | 80000 | - | - | 79.83 | 81.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233.log.json) | +| DeepLabV3+ | R-101-D8 | 769x769 | 80000 | - | - | 80.65 | 81.47 | [config\[1\]](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720-dfcc0b68.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720.log.json) | +| DeepLabV3+ | R-101-D16-MG124 | 512x1024 | 40000 | 5.8 | 7.48 | 79.09 | 80.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-cf9ce186.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes-20200908_005644.log.json) | +| DeepLabV3+ | R-101-D16-MG124 | 512x1024 | 80000 | 9.9 | - | 79.90 | 81.33 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-ee6158e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes-20200908_005644.log.json) | +| DeepLabV3+ | R-18b-D8 | 512x1024 | 80000 | 2.1 | 14.95 | 75.87 | 77.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes_20201226_090828-e451abd9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes-20201226_090828.log.json) | +| DeepLabV3+ | R-50b-D8 | 512x1024 | 80000 | 7.4 | 3.94 | 80.28 | 81.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes_20201225_213645-a97e4e43.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes-20201225_213645.log.json) | +| DeepLabV3+ | R-101b-D8 | 512x1024 | 80000 | 10.9 | 2.60 | 80.16 | 81.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes_20201226_190843-9c3c93a4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes-20201226_190843.log.json) | +| DeepLabV3+ | R-18b-D8 | 769x769 | 80000 | 2.4 | 5.96 | 76.36 | 78.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes_20201226_151312-2c868aff.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes-20201226_151312.log.json) | +| DeepLabV3+ | R-50b-D8 | 769x769 | 80000 | 8.4 | 1.72 | 79.41 | 80.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes_20201225_224655-8b596d1c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes-20201225_224655.log.json) | +| DeepLabV3+ | R-101b-D8 | 769x769 | 80000 | 12.3 | 1.10 | 79.88 | 81.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes_20201226_205041-227cdf7c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes-20201226_205041.log.json) | + +\[1\] The training of the model is sensitive to random seed, and the seed to train it is 1111. + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 10.6 | 21.01 | 42.72 | 43.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 14.1 | 14.16 | 44.60 | 46.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 160000 | - | - | 43.95 | 44.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 160000 | - | - | 45.47 | 46.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-50-D8 | 512x512 | 20000 | 7.6 | 21 | 75.93 | 77.50 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 20000 | 11 | 13.88 | 77.22 | 78.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 40000 | - | - | 76.81 | 77.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 40000 | - | - | 78.62 | 79.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | 9.09 | 47.30 | 48.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context-20200911_165459.log.json) | +| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | 47.23 | 48.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context-20200911_155322.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-101-D8 | 480x480 | 40000 | - | - | 52.86 | 54.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59_20210416_111233-ed937f15.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59-20210416_111233.log.json) | +| DeepLabV3+ | R-101-D8 | 480x480 | 80000 | - | - | 53.2 | 54.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59_20210416_111127-7ca0331d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59-20210416_111127.log.json) | + +### LoveDA + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.93 | 25.57 | 50.28 | 50.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800-ce0fa0ca.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.37 | 6.00 | 50.99 | 50.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442-f0720392.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.84 | 4.33 | 51.47 | 51.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759-4c1f297e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759.log.json) | + +### Potsdam + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.91 | 81.68 | 77.09 | 78.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601-75fd5bc3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.44 | 78.33 | 79.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508-7e7a2b24.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 17.56 | 78.7 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508.log.json) | + +### Vaihingen + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-18-D8 | 512x512 | 80000 | 1.91 | 72.79 | 72.50 | 74.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805-7626a263.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805.log.json) | +| DeepLabV3+ | R-50-D8 | 512x512 | 80000 | 7.36 | 26.91 | 73.97 | 75.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816-5040938d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json) | +| DeepLabV3+ | R-101-D8 | 512x512 | 80000 | 10.83 | 18.59 | 73.06 | 74.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816-8a095afa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816.log.json) | + +### iSAID + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DeepLabV3+ | R-18-D8 | 896x896 | 80000 | 6.19 | 24.81 | 61.35 | 62.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526-7059991d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | +| DeepLabV3+ | R-50-D8 | 896x896 | 80000 | 21.45 | 8.42 | 67.06 | 68.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526-598be439.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | + +Note: + +- `D-8`/`D-16` here corresponding to the output stride 8/16 setting for DeepLab series. +- `MG-124` stands for multi-grid dilation in the last stage of ResNet. +- `FP16` means Mixed Precision (FP16) is adopted in training. +- `896x896` is the Crop Size of iSAID dataset, which is followed by the implementation of [PointFlow: Flowing Semantics Through Points for Aerial Image Segmentation](https://arxiv.org/pdf/2103.06564.pdf) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus.yml new file mode 100644 index 0000000..56790c8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus.yml @@ -0,0 +1,850 @@ +Collections: +- Name: DeepLabV3+ + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + - LoveDA + - Potsdam + - Vaihingen + - iSAID + Paper: + URL: https://arxiv.org/abs/1802.02611 + Title: Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation + README: configs/deeplabv3plus/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/sep_aspp_head.py#L30 + Version: v0.17.0 + Converted From: + Code: https://github.com/tensorflow/models/tree/master/research/deeplab +Models: +- Name: deeplabv3plus_r50-d8_512x1024_40k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 253.81 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 7.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.61 + mIoU(ms+flip): 81.01 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes/deeplabv3plus_r50-d8_512x1024_40k_cityscapes_20200605_094610-d222ffcd.pth +- Name: deeplabv3plus_r101-d8_512x1024_40k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 384.62 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 11.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.21 + mIoU(ms+flip): 81.82 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes/deeplabv3plus_r101-d8_512x1024_40k_cityscapes_20200605_094614-3769eecf.pth +- Name: deeplabv3plus_r50-d8_769x769_40k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 581.4 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 8.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.97 + mIoU(ms+flip): 80.46 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes/deeplabv3plus_r50-d8_769x769_40k_cityscapes_20200606_114143-1dcb0e3c.pth +- Name: deeplabv3plus_r101-d8_769x769_40k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 869.57 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 12.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.46 + mIoU(ms+flip): 80.5 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes/deeplabv3plus_r101-d8_769x769_40k_cityscapes_20200606_114304-ff414b9e.pth +- Name: deeplabv3plus_r18-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-18-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 70.08 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 2.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.89 + mIoU(ms+flip): 78.76 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes/deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth +- Name: deeplabv3plus_r50-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.09 + mIoU(ms+flip): 81.13 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes/deeplabv3plus_r50-d8_512x1024_80k_cityscapes_20200606_114049-f9fb496d.pth +- Name: deeplabv3plus_r101-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.97 + mIoU(ms+flip): 82.03 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_512x1024_80k_cityscapes_20200606_114143-068fcfe9.pth +- Name: deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 127.06 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (512,1024) + Training Memory (GB): 6.35 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.46 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230920-f1104f4b.pth +- Name: deeplabv3plus_r18-d8_769x769_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-18-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 174.22 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 2.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.26 + mIoU(ms+flip): 77.91 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes/deeplabv3plus_r18-d8_769x769_80k_cityscapes_20201226_083346-f326e06a.pth +- Name: deeplabv3plus_r50-d8_769x769_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.83 + mIoU(ms+flip): 81.48 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes/deeplabv3plus_r50-d8_769x769_80k_cityscapes_20200606_210233-0e9dfdc4.pth +- Name: deeplabv3plus_r101-d8_769x769_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.65 + mIoU(ms+flip): 81.47 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes/deeplabv3plus_r101-d8_769x769_80k_cityscapes_20220406_154720-dfcc0b68.pth +- Name: deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D16-MG124 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 133.69 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.09 + mIoU(ms+flip): 80.36 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes_20200908_005644-cf9ce186.pth +- Name: deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D16-MG124 + crop size: (512,1024) + lr schd: 80000 + Training Memory (GB): 9.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.9 + mIoU(ms+flip): 81.33 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes_20200908_005644-ee6158e0.pth +- Name: deeplabv3plus_r18b-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-18b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 66.89 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 2.1 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.87 + mIoU(ms+flip): 77.52 + Config: configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes_20201226_090828-e451abd9.pth +- Name: deeplabv3plus_r50b-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-50b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 253.81 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 7.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.28 + mIoU(ms+flip): 81.44 + Config: configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes_20201225_213645-a97e4e43.pth +- Name: deeplabv3plus_r101b-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-101b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 384.62 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 10.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.16 + mIoU(ms+flip): 81.41 + Config: configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes_20201226_190843-9c3c93a4.pth +- Name: deeplabv3plus_r18b-d8_769x769_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-18b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 167.79 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 2.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.36 + mIoU(ms+flip): 78.24 + Config: configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes/deeplabv3plus_r18b-d8_769x769_80k_cityscapes_20201226_151312-2c868aff.pth +- Name: deeplabv3plus_r50b-d8_769x769_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-50b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 581.4 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 8.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.41 + mIoU(ms+flip): 80.56 + Config: configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes/deeplabv3plus_r50b-d8_769x769_80k_cityscapes_20201225_224655-8b596d1c.pth +- Name: deeplabv3plus_r101b-d8_769x769_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: R-101b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 909.09 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 12.3 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.88 + mIoU(ms+flip): 81.46 + Config: configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes/deeplabv3plus_r101b-d8_769x769_80k_cityscapes_20201226_205041-227cdf7c.pth +- Name: deeplabv3plus_r50-d8_512x512_80k_ade20k + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 47.6 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 10.6 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.72 + mIoU(ms+flip): 43.75 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k/deeplabv3plus_r50-d8_512x512_80k_ade20k_20200614_185028-bf1400d8.pth +- Name: deeplabv3plus_r101-d8_512x512_80k_ade20k + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 70.62 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 14.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.6 + mIoU(ms+flip): 46.06 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k/deeplabv3plus_r101-d8_512x512_80k_ade20k_20200615_014139-d5730af7.pth +- Name: deeplabv3plus_r50-d8_512x512_160k_ade20k + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.95 + mIoU(ms+flip): 44.93 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k/deeplabv3plus_r50-d8_512x512_160k_ade20k_20200615_124504-6135c7e0.pth +- Name: deeplabv3plus_r101-d8_512x512_160k_ade20k + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.47 + mIoU(ms+flip): 46.35 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k/deeplabv3plus_r101-d8_512x512_160k_ade20k_20200615_123232-38ed86bb.pth +- Name: deeplabv3plus_r50-d8_512x512_20k_voc12aug + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 47.62 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.6 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.93 + mIoU(ms+flip): 77.5 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug/deeplabv3plus_r50-d8_512x512_20k_voc12aug_20200617_102323-aad58ef1.pth +- Name: deeplabv3plus_r101-d8_512x512_20k_voc12aug + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 72.05 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 11.0 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.22 + mIoU(ms+flip): 78.59 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug/deeplabv3plus_r101-d8_512x512_20k_voc12aug_20200617_102345-c7ff3d56.pth +- Name: deeplabv3plus_r50-d8_512x512_40k_voc12aug + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.81 + mIoU(ms+flip): 77.57 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug/deeplabv3plus_r50-d8_512x512_40k_voc12aug_20200613_161759-e1b43aa9.pth +- Name: deeplabv3plus_r101-d8_512x512_40k_voc12aug + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.62 + mIoU(ms+flip): 79.53 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug/deeplabv3plus_r101-d8_512x512_40k_voc12aug_20200613_205333-faf03387.pth +- Name: deeplabv3plus_r101-d8_480x480_40k_pascal_context + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 40000 + inference time (ms/im): + - value: 110.01 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (480,480) + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 47.3 + mIoU(ms+flip): 48.47 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context/deeplabv3plus_r101-d8_480x480_40k_pascal_context_20200911_165459-d3c8a29e.pth +- Name: deeplabv3plus_r101-d8_480x480_80k_pascal_context + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 47.23 + mIoU(ms+flip): 48.26 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context/deeplabv3plus_r101-d8_480x480_80k_pascal_context_20200911_155322-145d3ee8.pth +- Name: deeplabv3plus_r101-d8_480x480_40k_pascal_context_59 + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.86 + mIoU(ms+flip): 54.54 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59_20210416_111233-ed937f15.pth +- Name: deeplabv3plus_r101-d8_480x480_80k_pascal_context_59 + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 53.2 + mIoU(ms+flip): 54.67 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59_20210416_111127-7ca0331d.pth +- Name: deeplabv3plus_r18-d8_512x512_80k_loveda + In Collection: DeepLabV3+ + Metadata: + backbone: R-18-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 39.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.93 + Results: + - Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.28 + mIoU(ms+flip): 50.47 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda/deeplabv3plus_r18-d8_512x512_80k_loveda_20211104_132800-ce0fa0ca.pth +- Name: deeplabv3plus_r50-d8_512x512_80k_loveda + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 166.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.37 + Results: + - Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.99 + mIoU(ms+flip): 50.65 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda/deeplabv3plus_r50-d8_512x512_80k_loveda_20211105_080442-f0720392.pth +- Name: deeplabv3plus_r101-d8_512x512_80k_loveda + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 230.95 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 10.84 + Results: + - Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 51.47 + mIoU(ms+flip): 51.32 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda/deeplabv3plus_r101-d8_512x512_80k_loveda_20211105_110759-4c1f297e.pth +- Name: deeplabv3plus_r18-d8_512x512_80k_potsdam + In Collection: DeepLabV3+ + Metadata: + backbone: R-18-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 12.24 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.91 + Results: + - Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 77.09 + mIoU(ms+flip): 78.44 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam/deeplabv3plus_r18-d8_512x512_80k_potsdam_20211219_020601-75fd5bc3.pth +- Name: deeplabv3plus_r50-d8_512x512_80k_potsdam + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 37.82 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.36 + Results: + - Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.33 + mIoU(ms+flip): 79.27 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam/deeplabv3plus_r50-d8_512x512_80k_potsdam_20211219_031508-7e7a2b24.pth +- Name: deeplabv3plus_r101-d8_512x512_80k_potsdam + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 56.95 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 10.83 + Results: + - Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.7 + mIoU(ms+flip): 79.47 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam/deeplabv3plus_r101-d8_512x512_80k_potsdam_20211219_031508-8b112708.pth +- Name: deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen + In Collection: DeepLabV3+ + Metadata: + backbone: R-18-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 13.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.91 + Results: + - Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.5 + mIoU(ms+flip): 74.13 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen_20211231_230805-7626a263.pth +- Name: deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 37.16 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.36 + Results: + - Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 73.97 + mIoU(ms+flip): 75.05 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen_20211231_230816-5040938d.pth +- Name: deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen + In Collection: DeepLabV3+ + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 53.79 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 10.83 + Results: + - Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 73.06 + mIoU(ms+flip): 74.14 + Config: configs/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen_20211231_230816-8a095afa.pth +- Name: deeplabv3plus_r18-d8_4x4_896x896_80k_isaid + In Collection: DeepLabV3+ + Metadata: + backbone: R-18-D8 + crop size: (896,896) + lr schd: 80000 + inference time (ms/im): + - value: 40.31 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (896,896) + Training Memory (GB): 6.19 + Results: + - Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 61.35 + mIoU(ms+flip): 62.61 + Config: configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid_20220110_180526-7059991d.pth +- Name: deeplabv3plus_r50-d8_4x4_896x896_80k_isaid + In Collection: DeepLabV3+ + Metadata: + backbone: R-50-D8 + crop size: (896,896) + lr schd: 80000 + inference time (ms/im): + - value: 118.76 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (896,896) + Training Memory (GB): 21.45 + Results: + - Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 67.06 + mIoU(ms+flip): 68.02 + Config: configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid_20220110_180526-598be439.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..bf39d2f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_40k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..c53ec41 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d16-mg124_512x1024_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet101_v1c', + backbone=dict( + depth=101, + dilations=(1, 1, 1, 2), + strides=(1, 2, 2, 1), + multi_grid=(1, 2, 4)), + decode_head=dict( + dilations=(1, 6, 12, 18), + sampler=dict(type='OHEMPixelSampler', min_kept=100000))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..68e2b07 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..36a510f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..3a46c28 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..a6a7688 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen.py new file mode 100644 index 0000000..4bddf4f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_4x4_512x512_80k_vaihingen.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..d6ce85a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..0ebbd3c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..a75c9d3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..ebb1a8e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..3caa6cf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..53fd3a9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda.py new file mode 100644 index 0000000..b3ad3ca --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_loveda.py @@ -0,0 +1,6 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_80k_loveda.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c'))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam.py new file mode 100644 index 0000000..d894914 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_512x512_80k_potsdam.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_80k_potsdam.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..c3c92eb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..5ea9cdb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..fc36940 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = './deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..398d975 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..1364490 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r101b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen.py new file mode 100644 index 0000000..879e941 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_512x512_80k_vaihingen.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid.py new file mode 100644 index 0000000..892a8a3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_4x4_896x896_80k_isaid.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_4x4_896x896_80k_isaid.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..aff70c9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda.py new file mode 100644 index 0000000..11fe640 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_loveda.py @@ -0,0 +1,13 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_80k_loveda.py' +model = dict( + backbone=dict( + depth=18, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam.py new file mode 100644 index 0000000..ffb20df --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_512x512_80k_potsdam.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_512x512_80k_potsdam.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..0172d9a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18-d8_769x769_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..b90b292 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..b49da35 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r18b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + c1_in_channels=64, + c1_channels=12, + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..318845d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..f9e831b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..1736c23 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..d2af575 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py new file mode 100644 index 0000000..fed9314 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_512x512_80k_vaihingen.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/vaihingen.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=6), auxiliary_head=dict(num_classes=6)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid.py new file mode 100644 index 0000000..a1a8beb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_4x4_896x896_80k_isaid.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/isaid.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=16), auxiliary_head=dict(num_classes=16)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..7243d03 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..3304d36 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..1491e3b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..1056ad4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..e36c83b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..352d870 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda.py new file mode 100644 index 0000000..62756f6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_loveda.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', '../_base_/datasets/loveda.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=7), auxiliary_head=dict(num_classes=7)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py new file mode 100644 index 0000000..d5ae03f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_512x512_80k_potsdam.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/potsdam.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=6), auxiliary_head=dict(num_classes=6)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..e4bda3e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..1420b97 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/deeplabv3plus_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..dd8e1da --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..c0ba019 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/deeplabv3plus/deeplabv3plus_r50b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './deeplabv3plus_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/README.md new file mode 100644 index 0000000..301bd45 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/README.md @@ -0,0 +1,59 @@ +# DMNet + +[Dynamic Multi-scale Filters for Semantic Segmentation](https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Multi-scale representation provides an effective way toaddress scale variation of objects and stuff in semantic seg-mentation. Previous works construct multi-scale represen-tation by utilizing different filter sizes, expanding filter sizeswith dilated filters or pooling grids, and the parameters ofthese filters are fixed after training. These methods oftensuffer from heavy computational cost or have more param-eters, and are not adaptive to the input image during in-ference. To address these problems, this paper proposes aDynamic Multi-scale Network (DMNet) to adaptively cap-ture multi-scale contents for predicting pixel-level semanticlabels. DMNet is composed of multiple Dynamic Convolu-tional Modules (DCMs) arranged in parallel, each of whichexploits context-aware filters to estimate semantic represen-tation for a specific scale. The outputs of multiple DCMsare further integrated for final segmentation. We conductextensive experiments to evaluate our DMNet on three chal-lenging semantic segmentation and scene parsing datasets,PASCAL VOC 2012, Pascal-Context, and ADE20K. DMNetachieves a new record 84.4% mIoU on PASCAL VOC 2012test set without MS COCO pre-trained and post-processing,and also obtains state-of-the-art performance on Pascal-Context and ADE20K. + + + +
+ +
+ +## Citation + +```bibtex +@InProceedings{He_2019_ICCV, +author = {He, Junjun and Deng, Zhongying and Qiao, Yu}, +title = {Dynamic Multi-Scale Filters for Semantic Segmentation}, +booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision (ICCV)}, +month = {October}, +year = {2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DMNet | R-50-D8 | 512x1024 | 40000 | 7.0 | 3.66 | 77.78 | 79.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201215_042326-615373cf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes-20201215_042326.log.json) | +| DMNet | R-101-D8 | 512x1024 | 40000 | 10.6 | 2.54 | 78.37 | 79.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201215_043100-8291e976.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes-20201215_043100.log.json) | +| DMNet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.57 | 78.49 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201215_093706-e7f0e23e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes-20201215_093706.log.json) | +| DMNet | R-101-D8 | 769x769 | 40000 | 12.0 | 1.01 | 77.62 | 78.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201215_081348-a74261f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes-20201215_081348.log.json) | +| DMNet | R-50-D8 | 512x1024 | 80000 | - | - | 79.07 | 80.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201215_053728-3c8893b9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes-20201215_053728.log.json) | +| DMNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.64 | 80.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201215_031718-fa081cb8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes-20201215_031718.log.json) | +| DMNet | R-50-D8 | 769x769 | 80000 | - | - | 79.22 | 80.55 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201215_034006-6060840e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes-20201215_034006.log.json) | +| DMNet | R-101-D8 | 769x769 | 80000 | - | - | 79.19 | 80.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201215_082810-7f0de59a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes-20201215_082810.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DMNet | R-50-D8 | 512x512 | 80000 | 9.4 | 20.95 | 42.37 | 43.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201215_144744-f89092a6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k-20201215_144744.log.json) | +| DMNet | R-101-D8 | 512x512 | 80000 | 13.0 | 13.88 | 45.34 | 46.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201215_104812-bfa45311.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k-20201215_104812.log.json) | +| DMNet | R-50-D8 | 512x512 | 160000 | - | - | 43.15 | 44.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201215_115313-025ab3f9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k-20201215_115313.log.json) | +| DMNet | R-101-D8 | 512x512 | 160000 | - | - | 45.42 | 46.76 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201215_111145-a0bc02ef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k-20201215_111145.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet.yml new file mode 100644 index 0000000..1fab2dc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet.yml @@ -0,0 +1,232 @@ +Collections: +- Name: DMNet + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + URL: https://openaccess.thecvf.com/content_ICCV_2019/papers/He_Dynamic_Multi-Scale_Filters_for_Semantic_Segmentation_ICCV_2019_paper.pdf + Title: Dynamic Multi-scale Filters for Semantic Segmentation + README: configs/dmnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dm_head.py#L93 + Version: v0.17.0 + Converted From: + Code: https://github.com/Junjun2016/DMNet +Models: +- Name: dmnet_r50-d8_512x1024_40k_cityscapes + In Collection: DMNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 273.22 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 7.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.78 + mIoU(ms+flip): 79.14 + Config: configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes/dmnet_r50-d8_512x1024_40k_cityscapes_20201215_042326-615373cf.pth +- Name: dmnet_r101-d8_512x1024_40k_cityscapes + In Collection: DMNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 393.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 10.6 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.37 + mIoU(ms+flip): 79.72 + Config: configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes/dmnet_r101-d8_512x1024_40k_cityscapes_20201215_043100-8291e976.pth +- Name: dmnet_r50-d8_769x769_40k_cityscapes + In Collection: DMNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 636.94 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 7.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.49 + mIoU(ms+flip): 80.27 + Config: configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_40k_cityscapes/dmnet_r50-d8_769x769_40k_cityscapes_20201215_093706-e7f0e23e.pth +- Name: dmnet_r101-d8_769x769_40k_cityscapes + In Collection: DMNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 990.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 12.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.62 + mIoU(ms+flip): 78.94 + Config: configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_40k_cityscapes/dmnet_r101-d8_769x769_40k_cityscapes_20201215_081348-a74261f6.pth +- Name: dmnet_r50-d8_512x1024_80k_cityscapes + In Collection: DMNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.07 + mIoU(ms+flip): 80.22 + Config: configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes/dmnet_r50-d8_512x1024_80k_cityscapes_20201215_053728-3c8893b9.pth +- Name: dmnet_r101-d8_512x1024_80k_cityscapes + In Collection: DMNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.64 + mIoU(ms+flip): 80.67 + Config: configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes/dmnet_r101-d8_512x1024_80k_cityscapes_20201215_031718-fa081cb8.pth +- Name: dmnet_r50-d8_769x769_80k_cityscapes + In Collection: DMNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.22 + mIoU(ms+flip): 80.55 + Config: configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_769x769_80k_cityscapes/dmnet_r50-d8_769x769_80k_cityscapes_20201215_034006-6060840e.pth +- Name: dmnet_r101-d8_769x769_80k_cityscapes + In Collection: DMNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.19 + mIoU(ms+flip): 80.65 + Config: configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_769x769_80k_cityscapes/dmnet_r101-d8_769x769_80k_cityscapes_20201215_082810-7f0de59a.pth +- Name: dmnet_r50-d8_512x512_80k_ade20k + In Collection: DMNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 47.73 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.4 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.37 + mIoU(ms+flip): 43.62 + Config: configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_80k_ade20k/dmnet_r50-d8_512x512_80k_ade20k_20201215_144744-f89092a6.pth +- Name: dmnet_r101-d8_512x512_80k_ade20k + In Collection: DMNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 72.05 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 13.0 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.34 + mIoU(ms+flip): 46.13 + Config: configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_80k_ade20k/dmnet_r101-d8_512x512_80k_ade20k_20201215_104812-bfa45311.pth +- Name: dmnet_r50-d8_512x512_160k_ade20k + In Collection: DMNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.15 + mIoU(ms+flip): 44.17 + Config: configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r50-d8_512x512_160k_ade20k/dmnet_r50-d8_512x512_160k_ade20k_20201215_115313-025ab3f9.pth +- Name: dmnet_r101-d8_512x512_160k_ade20k + In Collection: DMNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.42 + mIoU(ms+flip): 46.76 + Config: configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dmnet/dmnet_r101-d8_512x512_160k_ade20k/dmnet_r101-d8_512x512_160k_ade20k_20201215_111145-a0bc02ef.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..fd68976 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..116cbdc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..d78d46c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..9713b73 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..6b222e7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..f36d490 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dmnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..1f9a917 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..1b38f90 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..a8fbd9b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..74f6d6a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..1984154 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..31d95f9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dmnet/dmnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/dmnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/README.md new file mode 100644 index 0000000..975c4b0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/README.md @@ -0,0 +1,62 @@ +# DNLNet + +[Disentangled Non-Local Neural Networks](https://arxiv.org/abs/2006.06668) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The non-local block is a popular module for strengthening the context modeling ability of a regular convolutional neural network. This paper first studies the non-local block in depth, where we find that its attention computation can be split into two terms, a whitened pairwise term accounting for the relationship between two pixels and a unary term representing the saliency of every pixel. We also observe that the two terms trained alone tend to model different visual clues, e.g. the whitened pairwise term learns within-region relationships while the unary term learns salient boundaries. However, the two terms are tightly coupled in the non-local block, which hinders the learning of each. Based on these findings, we present the disentangled non-local block, where the two terms are decoupled to facilitate learning for both terms. We demonstrate the effectiveness of the decoupled design on various tasks, such as semantic segmentation on Cityscapes, ADE20K and PASCAL Context, object detection on COCO, and action recognition on Kinetics. + + + +
+ +
+ +## Citation + +This example is to reproduce ["Disentangled Non-Local Neural Networks"](https://arxiv.org/abs/2006.06668) for semantic segmentation. It is still in progress. + +## Citation + +```bibtex +@misc{yin2020disentangled, + title={Disentangled Non-Local Neural Networks}, + author={Minghao Yin and Zhuliang Yao and Yue Cao and Xiu Li and Zheng Zhang and Stephen Lin and Han Hu}, + year={2020}, + booktitle={ECCV} +} +``` + +## Results and models (in progress) + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DNLNet | R-50-D8 | 512x1024 | 40000 | 7.3 | 2.56 | 78.61 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes_20200904_233629-53d4ea93.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.96 | 78.31 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes_20200904_233629-9928ffef.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-50-D8 | 769x769 | 40000 | 9.2 | 1.50 | 78.44 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes_20200820_232206-0f283785.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes-20200820_232206.log.json) | +| DNLNet | R-101-D8 | 769x769 | 40000 | 12.6 | 1.02 | 76.39 | 77.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes_20200820_171256-76c596df.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes-20200820_171256.log.json) | +| DNLNet | R-50-D8 | 512x1024 | 80000 | - | - | 79.33 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes_20200904_233629-58b2f778.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-101-D8 | 512x1024 | 80000 | - | - | 80.41 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes_20200904_233629-758e2dd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes-20200904_233629.log.json) | +| DNLNet | R-50-D8 | 769x769 | 80000 | - | - | 79.36 | 80.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes_20200820_011925-366bc4c7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes-20200820_011925.log.json) | +| DNLNet | R-101-D8 | 769x769 | 80000 | - | - | 79.41 | 80.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes_20200821_051111-95ff84ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes-20200821_051111.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| DNLNet | R-50-D8 | 512x512 | 80000 | 8.8 | 20.66 | 41.76 | 42.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k_20200826_183354-1cf6e0c1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k-20200826_183354.log.json) | +| DNLNet | R-101-D8 | 512x512 | 80000 | 12.8 | 12.54 | 43.76 | 44.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k_20200826_183354-d820d6ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k-20200826_183354.log.json) | +| DNLNet | R-50-D8 | 512x512 | 160000 | - | - | 41.87 | 43.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k_20200826_183350-37837798.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k-20200826_183350.log.json) | +| DNLNet | R-101-D8 | 512x512 | 160000 | - | - | 44.25 | 45.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k_20200826_183350-ed522c61.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k-20200826_183350.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..1a36e3c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..0f2e1b6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..aca44e4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..ebd27a1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..575e9d0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..4f1b9e1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './dnl_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..f7aa744 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..fdff93f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..5305689 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..09604c3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..0666199 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..f7b07c4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = [ + '../_base_/models/dnl_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) +optimizer = dict( + paramwise_cfg=dict( + custom_keys=dict(theta=dict(wd_mult=0.), phi=dict(wd_mult=0.)))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnlnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnlnet.yml new file mode 100644 index 0000000..8ee7b54 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dnlnet/dnlnet.yml @@ -0,0 +1,228 @@ +Collections: +- Name: DNLNet + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + URL: https://arxiv.org/abs/2006.06668 + Title: Disentangled Non-Local Neural Networks + README: configs/dnlnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dnl_head.py#L88 + Version: v0.17.0 + Converted From: + Code: https://github.com/yinmh17/DNL-Semantic-Segmentation +Models: +- Name: dnl_r50-d8_512x1024_40k_cityscapes + In Collection: DNLNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 390.62 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 7.3 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.61 + Config: configs/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_40k_cityscapes/dnl_r50-d8_512x1024_40k_cityscapes_20200904_233629-53d4ea93.pth +- Name: dnl_r101-d8_512x1024_40k_cityscapes + In Collection: DNLNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 510.2 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 10.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.31 + Config: configs/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_40k_cityscapes/dnl_r101-d8_512x1024_40k_cityscapes_20200904_233629-9928ffef.pth +- Name: dnl_r50-d8_769x769_40k_cityscapes + In Collection: DNLNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 666.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 9.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.44 + mIoU(ms+flip): 80.27 + Config: configs/dnlnet/dnl_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_40k_cityscapes/dnl_r50-d8_769x769_40k_cityscapes_20200820_232206-0f283785.pth +- Name: dnl_r101-d8_769x769_40k_cityscapes + In Collection: DNLNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 980.39 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 12.6 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.39 + mIoU(ms+flip): 77.77 + Config: configs/dnlnet/dnl_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_40k_cityscapes/dnl_r101-d8_769x769_40k_cityscapes_20200820_171256-76c596df.pth +- Name: dnl_r50-d8_512x1024_80k_cityscapes + In Collection: DNLNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.33 + Config: configs/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x1024_80k_cityscapes/dnl_r50-d8_512x1024_80k_cityscapes_20200904_233629-58b2f778.pth +- Name: dnl_r101-d8_512x1024_80k_cityscapes + In Collection: DNLNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.41 + Config: configs/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x1024_80k_cityscapes/dnl_r101-d8_512x1024_80k_cityscapes_20200904_233629-758e2dd4.pth +- Name: dnl_r50-d8_769x769_80k_cityscapes + In Collection: DNLNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.36 + mIoU(ms+flip): 80.7 + Config: configs/dnlnet/dnl_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_769x769_80k_cityscapes/dnl_r50-d8_769x769_80k_cityscapes_20200820_011925-366bc4c7.pth +- Name: dnl_r101-d8_769x769_80k_cityscapes + In Collection: DNLNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.41 + mIoU(ms+flip): 80.68 + Config: configs/dnlnet/dnl_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_769x769_80k_cityscapes/dnl_r101-d8_769x769_80k_cityscapes_20200821_051111-95ff84ab.pth +- Name: dnl_r50-d8_512x512_80k_ade20k + In Collection: DNLNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 48.4 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.8 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.76 + mIoU(ms+flip): 42.99 + Config: configs/dnlnet/dnl_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_80k_ade20k/dnl_r50-d8_512x512_80k_ade20k_20200826_183354-1cf6e0c1.pth +- Name: dnl_r101-d8_512x512_80k_ade20k + In Collection: DNLNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 79.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.8 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.76 + mIoU(ms+flip): 44.91 + Config: configs/dnlnet/dnl_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_80k_ade20k/dnl_r101-d8_512x512_80k_ade20k_20200826_183354-d820d6ea.pth +- Name: dnl_r50-d8_512x512_160k_ade20k + In Collection: DNLNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.87 + mIoU(ms+flip): 43.01 + Config: configs/dnlnet/dnl_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r50-d8_512x512_160k_ade20k/dnl_r50-d8_512x512_160k_ade20k_20200826_183350-37837798.pth +- Name: dnl_r101-d8_512x512_160k_ade20k + In Collection: DNLNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.25 + mIoU(ms+flip): 45.78 + Config: configs/dnlnet/dnl_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dnlnet/dnl_r101-d8_512x512_160k_ade20k/dnl_r101-d8_512x512_160k_ade20k_20200826_183350-ed522c61.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dpt/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dpt/README.md new file mode 100644 index 0000000..5e62577 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dpt/README.md @@ -0,0 +1,67 @@ +# DPT + +[Vision Transformer for Dense Prediction](https://arxiv.org/abs/2103.13413) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We introduce dense vision transformers, an architecture that leverages vision transformers in place of convolutional networks as a backbone for dense prediction tasks. We assemble tokens from various stages of the vision transformer into image-like representations at various resolutions and progressively combine them into full-resolution predictions using a convolutional decoder. The transformer backbone processes representations at a constant and relatively high resolution and has a global receptive field at every stage. These properties allow the dense vision transformer to provide finer-grained and more globally coherent predictions when compared to fully-convolutional networks. Our experiments show that this architecture yields substantial improvements on dense prediction tasks, especially when a large amount of training data is available. For monocular depth estimation, we observe an improvement of up to 28% in relative performance when compared to a state-of-the-art fully-convolutional network. When applied to semantic segmentation, dense vision transformers set a new state of the art on ADE20K with 49.02% mIoU. We further show that the architecture can be fine-tuned on smaller datasets such as NYUv2, KITTI, and Pascal Context where it also sets the new state of the art. Our models are available at [this https URL](https://github.com/isl-org/DPT). + + + +
+ +
+ +## Citation + +```bibtex +@article{dosoViTskiy2020, + title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, + author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, + journal={arXiv preprint arXiv:2010.11929}, + year={2020} +} + +@article{Ranftl2021, + author = {Ren\'{e} Ranftl and Alexey Bochkovskiy and Vladlen Koltun}, + title = {Vision Transformers for Dense Prediction}, + journal = {ArXiv preprint}, + year = {2021}, +} +``` + +## Usage + +To use other repositories' pre-trained models, it is necessary to convert keys. + +We provide a script [`vit2mmseg.py`](../../tools/model_converters/vit2mmseg.py) in the tools directory to convert the key of models from [timm](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py) to MMSegmentation style. + +```shell +python tools/model_converters/vit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/vit2mmseg.py https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth pretrain/jx_vit_base_p16_224-80ecf9dd.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| DPT | ViT-B | 512x512 | 160000 | 8.09 | 10.41 | 46.97 | 48.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-20210809_172025.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dpt/dpt.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dpt/dpt.yml new file mode 100644 index 0000000..a4f9c65 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dpt/dpt.yml @@ -0,0 +1,37 @@ +Collections: +- Name: DPT + Metadata: + Training Data: + - ADE20K + Paper: + URL: https://arxiv.org/abs/2103.13413 + Title: Vision Transformer for Dense Prediction + README: configs/dpt/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/dpt_head.py#L215 + Version: v0.17.0 + Converted From: + Code: https://github.com/isl-org/DPT +Models: +- Name: dpt_vit-b16_512x512_160k_ade20k + In Collection: DPT + Metadata: + backbone: ViT-B + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 96.06 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.09 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.97 + mIoU(ms+flip): 48.34 + Config: configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/dpt/dpt_vit-b16_512x512_160k_ade20k/dpt_vit-b16_512x512_160k_ade20k-db31cf52.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py new file mode 100644 index 0000000..c751a68 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/dpt/dpt_vit-b16_512x512_160k_ade20k.py @@ -0,0 +1,32 @@ +_base_ = [ + '../_base_/models/dpt_vit-b16.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/README.md new file mode 100644 index 0000000..3e5752b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/README.md @@ -0,0 +1,46 @@ +# EMANet + +[Expectation-Maximization Attention Networks for Semantic Segmentation](https://arxiv.org/abs/1907.13426) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Self-attention mechanism has been widely used for various tasks. It is designed to compute the representation of each position by a weighted sum of the features at all positions. Thus, it can capture long-range relations for computer vision tasks. However, it is computationally consuming. Since the attention maps are computed w.r.t all other positions. In this paper, we formulate the attention mechanism into an expectation-maximization manner and iteratively estimate a much more compact set of bases upon which the attention maps are computed. By a weighted summation upon these bases, the resulting representation is low-rank and deprecates noisy information from the input. The proposed Expectation-Maximization Attention (EMA) module is robust to the variance of input and is also friendly in memory and computation. Moreover, we set up the bases maintenance and normalization methods to stabilize its training procedure. We conduct extensive experiments on popular semantic segmentation benchmarks including PASCAL VOC, PASCAL Context and COCO Stuff, on which we set new records. + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{li2019expectation, + title={Expectation-maximization attention networks for semantic segmentation}, + author={Li, Xia and Zhong, Zhisheng and Wu, Jianlong and Yang, Yibo and Lin, Zhouchen and Liu, Hong}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision}, + pages={9167--9176}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EMANet | R-50-D8 | 512x1024 | 80000 | 5.4 | 4.58 | 77.59 | 79.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes_20200901_100301-c43fcef1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-101-D8 | 512x1024 | 80000 | 6.2 | 2.87 | 79.10 | 81.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes_20200901_100301-2d970745.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-50-D8 | 769x769 | 80000 | 8.9 | 1.97 | 79.33 | 80.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes_20200901_100301-16f8de52.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes-20200901_100301.log.json) | +| EMANet | R-101-D8 | 769x769 | 80000 | 10.1 | 1.22 | 79.62 | 81.00 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes_20200901_100301-47a324ce.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes-20200901_100301.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet.yml new file mode 100644 index 0000000..22ebcdb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet.yml @@ -0,0 +1,103 @@ +Collections: +- Name: EMANet + Metadata: + Training Data: + - Cityscapes + Paper: + URL: https://arxiv.org/abs/1907.13426 + Title: Expectation-Maximization Attention Networks for Semantic Segmentation + README: configs/emanet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ema_head.py#L80 + Version: v0.17.0 + Converted From: + Code: https://xialipku.github.io/EMANet +Models: +- Name: emanet_r50-d8_512x1024_80k_cityscapes + In Collection: EMANet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 218.34 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.59 + mIoU(ms+flip): 79.44 + Config: configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_512x1024_80k_cityscapes/emanet_r50-d8_512x1024_80k_cityscapes_20200901_100301-c43fcef1.pth +- Name: emanet_r101-d8_512x1024_80k_cityscapes + In Collection: EMANet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 348.43 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.1 + mIoU(ms+flip): 81.21 + Config: configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_512x1024_80k_cityscapes/emanet_r101-d8_512x1024_80k_cityscapes_20200901_100301-2d970745.pth +- Name: emanet_r50-d8_769x769_80k_cityscapes + In Collection: EMANet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 507.61 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 8.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.33 + mIoU(ms+flip): 80.49 + Config: configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r50-d8_769x769_80k_cityscapes/emanet_r50-d8_769x769_80k_cityscapes_20200901_100301-16f8de52.pth +- Name: emanet_r101-d8_769x769_80k_cityscapes + In Collection: EMANet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 819.67 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.1 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.62 + mIoU(ms+flip): 81.0 + Config: configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/emanet/emanet_r101-d8_769x769_80k_cityscapes/emanet_r101-d8_769x769_80k_cityscapes_20200901_100301-47a324ce.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..58f28b4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './emanet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..c5dbf20 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './emanet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..73b7788 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/emanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..699aa21 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/emanet/emanet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/emanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/README.md new file mode 100644 index 0000000..c191943 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/README.md @@ -0,0 +1,59 @@ +# EncNet + +[Context Encoding for Semantic Segmentation](https://arxiv.org/abs/1803.08904) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Recent work has made significant progress in improving spatial resolution for pixelwise labeling with Fully Convolutional Network (FCN) framework by employing Dilated/Atrous convolution, utilizing multi-scale features and refining boundaries. In this paper, we explore the impact of global contextual information in semantic segmentation by introducing the Context Encoding Module, which captures the semantic context of scenes and selectively highlights class-dependent featuremaps. The proposed Context Encoding Module significantly improves semantic segmentation results with only marginal extra computation cost over FCN. Our approach has achieved new state-of-the-art results 51.7% mIoU on PASCAL-Context, 85.9% mIoU on PASCAL VOC 2012. Our single model achieves a final score of 0.5567 on ADE20K test set, which surpass the winning entry of COCO-Place Challenge in 2017. In addition, we also explore how the Context Encoding Module can improve the feature representation of relatively shallow networks for the image classification on CIFAR-10 dataset. Our 14 layer network has achieved an error rate of 3.45%, which is comparable with state-of-the-art approaches with over 10 times more layers. The source code for the complete system are publicly available. + + + +
+ +
+ +## Citation + +```bibtex +@InProceedings{Zhang_2018_CVPR, +author = {Zhang, Hang and Dana, Kristin and Shi, Jianping and Zhang, Zhongyue and Wang, Xiaogang and Tyagi, Ambrish and Agrawal, Amit}, +title = {Context Encoding for Semantic Segmentation}, +booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, +month = {June}, +year = {2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EncNet | R-50-D8 | 512x1024 | 40000 | 8.6 | 4.58 | 75.67 | 77.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes-20200621_220958.log.json) | +| EncNet | R-101-D8 | 512x1024 | 40000 | 12.1 | 2.66 | 75.81 | 77.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes-20200621_220933.log.json) | +| EncNet | R-50-D8 | 769x769 | 40000 | 9.8 | 1.82 | 76.24 | 77.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes-20200621_220958.log.json) | +| EncNet | R-101-D8 | 769x769 | 40000 | 13.7 | 1.26 | 74.25 | 76.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes-20200621_220933.log.json) | +| EncNet | R-50-D8 | 512x1024 | 80000 | - | - | 77.94 | 79.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes-20200622_003554.log.json) | +| EncNet | R-101-D8 | 512x1024 | 80000 | - | - | 78.55 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes-20200622_003555.log.json) | +| EncNet | R-50-D8 | 769x769 | 80000 | - | - | 77.44 | 78.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes-20200622_003554.log.json) | +| EncNet | R-101-D8 | 769x769 | 80000 | - | - | 76.10 | 76.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes-20200622_003555.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EncNet | R-50-D8 | 512x512 | 80000 | 10.1 | 22.81 | 39.53 | 41.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k-20200622_042412.log.json) | +| EncNet | R-101-D8 | 512x512 | 80000 | 13.6 | 14.87 | 42.11 | 43.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k-20200622_101128.log.json) | +| EncNet | R-50-D8 | 512x512 | 160000 | - | - | 40.10 | 41.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k-20200622_101059.log.json) | +| EncNet | R-101-D8 | 512x512 | 160000 | - | - | 42.61 | 44.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k-20200622_073348.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet.yml new file mode 100644 index 0000000..18fb32a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet.yml @@ -0,0 +1,232 @@ +Collections: +- Name: EncNet + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + URL: https://arxiv.org/abs/1803.08904 + Title: Context Encoding for Semantic Segmentation + README: configs/encnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/enc_head.py#L63 + Version: v0.17.0 + Converted From: + Code: https://github.com/zhanghang1989/PyTorch-Encoding +Models: +- Name: encnet_r50-d8_512x1024_40k_cityscapes + In Collection: EncNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 218.34 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 8.6 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.67 + mIoU(ms+flip): 77.08 + Config: configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_40k_cityscapes/encnet_r50-d8_512x1024_40k_cityscapes_20200621_220958-68638a47.pth +- Name: encnet_r101-d8_512x1024_40k_cityscapes + In Collection: EncNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 375.94 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 12.1 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.81 + mIoU(ms+flip): 77.21 + Config: configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_40k_cityscapes/encnet_r101-d8_512x1024_40k_cityscapes_20200621_220933-35e0a3e8.pth +- Name: encnet_r50-d8_769x769_40k_cityscapes + In Collection: EncNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 549.45 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 9.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.24 + mIoU(ms+flip): 77.85 + Config: configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_40k_cityscapes/encnet_r50-d8_769x769_40k_cityscapes_20200621_220958-3bcd2884.pth +- Name: encnet_r101-d8_769x769_40k_cityscapes + In Collection: EncNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 793.65 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 13.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.25 + mIoU(ms+flip): 76.25 + Config: configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_40k_cityscapes/encnet_r101-d8_769x769_40k_cityscapes_20200621_220933-2fafed55.pth +- Name: encnet_r50-d8_512x1024_80k_cityscapes + In Collection: EncNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.94 + mIoU(ms+flip): 79.13 + Config: configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x1024_80k_cityscapes/encnet_r50-d8_512x1024_80k_cityscapes_20200622_003554-fc5c5624.pth +- Name: encnet_r101-d8_512x1024_80k_cityscapes + In Collection: EncNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.55 + mIoU(ms+flip): 79.47 + Config: configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x1024_80k_cityscapes/encnet_r101-d8_512x1024_80k_cityscapes_20200622_003555-1de64bec.pth +- Name: encnet_r50-d8_769x769_80k_cityscapes + In Collection: EncNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.44 + mIoU(ms+flip): 78.72 + Config: configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_769x769_80k_cityscapes/encnet_r50-d8_769x769_80k_cityscapes_20200622_003554-55096dcb.pth +- Name: encnet_r101-d8_769x769_80k_cityscapes + In Collection: EncNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.1 + mIoU(ms+flip): 76.97 + Config: configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_769x769_80k_cityscapes/encnet_r101-d8_769x769_80k_cityscapes_20200622_003555-470ef79d.pth +- Name: encnet_r50-d8_512x512_80k_ade20k + In Collection: EncNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 43.84 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 10.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.53 + mIoU(ms+flip): 41.17 + Config: configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_80k_ade20k/encnet_r50-d8_512x512_80k_ade20k_20200622_042412-44b46b04.pth +- Name: encnet_r101-d8_512x512_80k_ade20k + In Collection: EncNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 67.25 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 13.6 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.11 + mIoU(ms+flip): 43.61 + Config: configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_80k_ade20k/encnet_r101-d8_512x512_80k_ade20k_20200622_101128-dd35e237.pth +- Name: encnet_r50-d8_512x512_160k_ade20k + In Collection: EncNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.1 + mIoU(ms+flip): 41.71 + Config: configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r50-d8_512x512_160k_ade20k/encnet_r50-d8_512x512_160k_ade20k_20200622_101059-b2db95e0.pth +- Name: encnet_r101-d8_512x512_160k_ade20k + In Collection: EncNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.61 + mIoU(ms+flip): 44.01 + Config: configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/encnet/encnet_r101-d8_512x512_160k_ade20k/encnet_r101-d8_512x512_160k_ade20k_20200622_073348-7989641f.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..f34373d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..0b0207b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..8fec6ba --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..c264af9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..8a6968e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..9415100 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..d6ade67 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..55648c0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './encnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..4ea6ed0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..d2feeef --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..2a5dc20 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..9cb7952 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..81f3cbf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..835375c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..d311e33 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..7b535f3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..600b701 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/encnet/encnet_r50s-d8_512x512_80k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/encnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + backbone=dict(stem_channels=128), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/erfnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/erfnet/README.md new file mode 100644 index 0000000..bcb61d3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/erfnet/README.md @@ -0,0 +1,52 @@ +# ERFNet + +[ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation](http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Semantic segmentation is a challenging task that addresses most of the perception needs of intelligent vehicles (IVs) in an unified way. Deep neural networks excel at this task, as they can be trained end-to-end to accurately classify multiple object categories in an image at pixel level. However, a good tradeoff between high quality and computational resources is yet not present in the state-of-the-art semantic segmentation approaches, limiting their application in real vehicles. In this paper, we propose a deep architecture that is able to run in real time while providing accurate semantic segmentation. The core of our architecture is a novel layer that uses residual connections and factorized convolutions in order to remain efficient while retaining remarkable accuracy. Our approach is able to run at over 83 FPS in a single Titan X, and 7 FPS in a Jetson TX1 (embedded device). A comprehensive set of experiments on the publicly available Cityscapes data set demonstrates that our system achieves an accuracy that is similar to the state of the art, while being orders of magnitude faster to compute than other architectures that achieve top precision. The resulting tradeoff makes our model an ideal approach for scene understanding in IV applications. The code is publicly available at: https://github.com/Eromera/erfnet. + + + +
+ +
+ +## Citation + +```bibtex +@article{romera2017erfnet, + title={Erfnet: Efficient residual factorized convnet for real-time semantic segmentation}, + author={Romera, Eduardo and Alvarez, Jos{\'e} M and Bergasa, Luis M and Arroyo, Roberto}, + journal={IEEE Transactions on Intelligent Transportation Systems}, + volume={19}, + number={1}, + pages={263--272}, + year={2017}, + publisher={IEEE} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ERFNet | ERFNet | 512x1024 | 160000 | 6.04 | 15.26 | 71.08 | 72.6 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20211126_082056-03d333ed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20211126_082056.log.json) | + +Note: + +- The model is trained from scratch. + +- Last deconvolution layer in the [original paper](https://github.com/Eromera/erfnet_pytorch/blob/master/train/erfnet.py#L123) is replaced by a naive `FCNHead` decoder head and a bilinear upsampling layer, found more effective and efficient. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/erfnet/erfnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/erfnet/erfnet.yml new file mode 100644 index 0000000..e4c34f9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/erfnet/erfnet.yml @@ -0,0 +1,37 @@ +Collections: +- Name: ERFNet + Metadata: + Training Data: + - Cityscapes + Paper: + URL: http://www.robesafe.uah.es/personal/eduardo.romera/pdfs/Romera17tits.pdf + Title: 'ERFNet: Efficient Residual Factorized ConvNet for Real-time Semantic Segmentation' + README: configs/erfnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/erfnet.py#L321 + Version: v0.20.0 + Converted From: + Code: https://github.com/Eromera/erfnet_pytorch +Models: +- Name: erfnet_fcn_4x4_512x1024_160k_cityscapes + In Collection: ERFNet + Metadata: + backbone: ERFNet + crop size: (512,1024) + lr schd: 160000 + inference time (ms/im): + - value: 65.53 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.04 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.08 + mIoU(ms+flip): 72.6 + Config: configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes/erfnet_fcn_4x4_512x1024_160k_cityscapes_20211126_082056-03d333ed.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py new file mode 100644 index 0000000..8cb8e51 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/erfnet/erfnet_fcn_4x4_512x1024_160k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/erfnet_fcn.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/README.md new file mode 100644 index 0000000..c35ffe0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/README.md @@ -0,0 +1,63 @@ +# FastFCN + +[FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation](https://arxiv.org/abs/1903.11816) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Modern approaches for semantic segmentation usually employ dilated convolutions in the backbone to extract high-resolution feature maps, which brings heavy computation complexity and memory footprint. To replace the time and memory consuming dilated convolutions, we propose a novel joint upsampling module named Joint Pyramid Upsampling (JPU) by formulating the task of extracting high-resolution feature maps into a joint upsampling problem. With the proposed JPU, our method reduces the computation complexity by more than three times without performance loss. Experiments show that JPU is superior to other upsampling modules, which can be plugged into many existing approaches to reduce computation complexity and improve performance. By replacing dilated convolutions with the proposed JPU module, our method achieves the state-of-the-art performance in Pascal Context dataset (mIoU of 53.13%) and ADE20K dataset (final score of 0.5584) while running 3 times faster. + + + +
+ +
+ +## Citation + +```bibtex +@article{wu2019fastfcn, +title={Fastfcn: Rethinking dilated convolution in the backbone for semantic segmentation}, +author={Wu, Huikai and Zhang, Junge and Huang, Kaiqi and Liang, Kongming and Yu, Yizhou}, +journal={arXiv preprint arXiv:1903.11816}, +year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------------------------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FastFCN + DeepLabV3 | R-50-D32 | 512x1024 | 80000 | 5.67 | 2.64 | 79.12 | 80.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722-5d1a2648.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722.log.json) | +| FastFCN + DeepLabV3 (4x4) | R-50-D32 | 512x1024 | 80000 | 9.79 | - | 79.52 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357-72220849.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357.log.json) | +| FastFCN + PSPNet | R-50-D32 | 512x1024 | 80000 | 5.67 | 4.40 | 79.26 | 80.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722-57749bed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722.log.json) | +| FastFCN + PSPNet (4x4) | R-50-D32 | 512x1024 | 80000 | 9.94 | - | 78.76 | 80.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841-77e87b0a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841.log.json) | +| FastFCN + EncNet | R-50-D32 | 512x1024 | 80000 | 8.15 | 4.77 | 77.97 | 79.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036-78da5046.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036.log.json) | +| FastFCN + EncNet (4x4) | R-50-D32 | 512x1024 | 80000 | 15.45 | - | 78.6 | 80.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217-e1eb6dbb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------------------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FastFCN + DeepLabV3 | R-50-D32 | 512x512 | 80000 | 8.46 | 12.06 | 41.88 | 42.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619-3aa40f2d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619.log.json) | +| FastFCN + DeepLabV3 | R-50-D32 | 512x512 | 160000 | - | - | 43.58 | 44.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246-27036aee.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246.log.json) | +| FastFCN + PSPNet | R-50-D32 | 512x512 | 80000 | 8.02 | 19.21 | 41.40 | 42.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137-993d07c8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137.log.json) | +| FastFCN + PSPNet | R-50-D32 | 512x512 | 160000 | - | - | 42.63 | 43.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455-e8f5a2fd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455.log.json) | +| FastFCN + EncNet | R-50-D32 | 512x512 | 80000 | 9.67 | 17.23 | 40.88 | 42.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214-65aef6dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214.log.json) | +| FastFCN + EncNet | R-50-D32 | 512x512 | 160000 | - | - | 42.50 | 44.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456-d875ce3c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456.log.json) | + +Note: + +- `4x4` means 4 GPUs with 4 samples per GPU in training, default setting is 4 GPUs with 2 samples per GPU in training. +- Results of [DeepLabV3 (mIoU: 79.32)](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3), [PSPNet (mIoU: 78.55)](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) and [ENCNet (mIoU: 77.94)](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) can be found in each original repository. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn.yml new file mode 100644 index 0000000..0131fcf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn.yml @@ -0,0 +1,235 @@ +Collections: +- Name: FastFCN + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + URL: https://arxiv.org/abs/1903.11816 + Title: 'FastFCN: Rethinking Dilated Convolution in the Backbone for Semantic Segmentation' + README: configs/fastfcn/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/jpu.py#L12 + Version: v0.18.0 + Converted From: + Code: https://github.com/wuhuikai/FastFCN +Models: +- Name: fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 378.79 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.67 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.12 + mIoU(ms+flip): 80.58 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes_20210928_053722-5d1a2648.pth +- Name: fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + Training Memory (GB): 9.79 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.52 + mIoU(ms+flip): 80.91 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes_20210924_214357-72220849.pth +- Name: fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 227.27 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.67 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.26 + mIoU(ms+flip): 80.86 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes_20210928_053722-57749bed.pth +- Name: fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + Training Memory (GB): 9.94 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.76 + mIoU(ms+flip): 80.03 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes_20210925_061841-77e87b0a.pth +- Name: fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 209.64 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 8.15 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.97 + mIoU(ms+flip): 79.92 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes_20210928_030036-78da5046.pth +- Name: fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + Training Memory (GB): 15.45 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.6 + mIoU(ms+flip): 80.25 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes_20210926_093217-e1eb6dbb.pth +- Name: fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 82.92 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.46 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.88 + mIoU(ms+flip): 42.91 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k_20211013_190619-3aa40f2d.pth +- Name: fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.58 + mIoU(ms+flip): 44.92 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k_20211008_152246-27036aee.pth +- Name: fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 52.06 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.02 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.4 + mIoU(ms+flip): 42.12 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k_20210930_225137-993d07c8.pth +- Name: fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.63 + mIoU(ms+flip): 43.71 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k_20211008_105455-e8f5a2fd.pth +- Name: fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 58.04 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.67 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.88 + mIoU(ms+flip): 42.36 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k_20210930_225214-65aef6dd.pth +- Name: fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k + In Collection: FastFCN + Metadata: + backbone: R-50-D32 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.5 + mIoU(ms+flip): 44.21 + Config: configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k_20211008_105456-d875ce3c.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..87fc274 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_4x4_512x1024_80k_cityscapes.py @@ -0,0 +1,6 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py' +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..dc86da3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x1024_80k_cityscapes.py @@ -0,0 +1,20 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='ASPPHead', + in_channels=2048, + in_index=2, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k.py new file mode 100644 index 0000000..dbf9f80 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_160k_ade20k.py @@ -0,0 +1,20 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='ASPPHead', + in_channels=2048, + in_index=2, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k.py new file mode 100644 index 0000000..b14b1f6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_aspp_512x512_80k_ade20k.py @@ -0,0 +1,20 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='ASPPHead', + in_channels=2048, + in_index=2, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..59d294b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_4x4_512x1024_80k_cityscapes.py @@ -0,0 +1,6 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py' +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..cc68edf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x1024_80k_cityscapes.py @@ -0,0 +1,24 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(0, 1, 2), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k.py new file mode 100644 index 0000000..12f0add --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_160k_ade20k.py @@ -0,0 +1,24 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(0, 1, 2), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k.py new file mode 100644 index 0000000..d3e2e9c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_enc_512x512_80k_ade20k.py @@ -0,0 +1,24 @@ +# model settings +_base_ = './fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + decode_head=dict( + _delete_=True, + type='EncHead', + in_channels=[512, 1024, 2048], + in_index=(0, 1, 2), + channels=512, + num_codes=32, + use_se_loss=True, + add_lateral=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_se_decode=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.2)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..5fe5ca1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_4x4_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fastfcn_r50-d32_jpu_psp.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..e7637fa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fastfcn_r50-d32_jpu_psp.py', + '../_base_/datasets/cityscapes.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py new file mode 100644 index 0000000..e267ac6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_160k_ade20k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fastfcn_r50-d32_jpu_psp.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py new file mode 100644 index 0000000..22e0447 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastfcn/fastfcn_r50-d32_jpu_psp_512x512_80k_ade20k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/fastfcn_r50-d32_jpu_psp.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastscnn/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastscnn/README.md new file mode 100644 index 0000000..1565626 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastscnn/README.md @@ -0,0 +1,42 @@ +# Fast-SCNN + +[Fast-SCNN for Semantic Segmentation](https://arxiv.org/abs/1902.04502) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The encoder-decoder framework is state-of-the-art for offline semantic image segmentation. Since the rise in autonomous systems, real-time computation is increasingly desirable. In this paper, we introduce fast segmentation convolutional neural network (Fast-SCNN), an above real-time semantic segmentation model on high resolution image data (1024x2048px) suited to efficient computation on embedded devices with low memory. Building on existing two-branch methods for fast segmentation, we introduce our \`learning to downsample' module which computes low-level features for multiple resolution branches simultaneously. Our network combines spatial detail at high resolution with deep features extracted at lower resolution, yielding an accuracy of 68.0% mean intersection over union at 123.5 frames per second on Cityscapes. We also show that large scale pre-training is unnecessary. We thoroughly validate our metric in experiments with ImageNet pre-training and the coarse labeled data of Cityscapes. Finally, we show even faster computation with competitive results on subsampled inputs, without any network modifications. + + + +
+ +
+ +## Citation + +```bibtex +@article{poudel2019fast, + title={Fast-scnn: Fast semantic segmentation network}, + author={Poudel, Rudra PK and Liwicki, Stephan and Cipolla, Roberto}, + journal={arXiv preprint arXiv:1902.04502}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| -------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FastSCNN | FastSCNN | 512x1024 | 160000 | 3.3 | 56.45 | 70.96 | 72.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853-0cec9937.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py new file mode 100644 index 0000000..4698125 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fast_scnn.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +# Re-config the data sampler. +data = dict(samples_per_gpu=4, workers_per_gpu=4) + +# Re-config the optimizer. +optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-5) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastscnn/fastscnn.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastscnn/fastscnn.yml new file mode 100644 index 0000000..cad0360 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fastscnn/fastscnn.yml @@ -0,0 +1,35 @@ +Collections: +- Name: FastSCNN + Metadata: + Training Data: + - Cityscapes + Paper: + URL: https://arxiv.org/abs/1902.04502 + Title: Fast-SCNN for Semantic Segmentation + README: configs/fastscnn/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/fast_scnn.py#L272 + Version: v0.17.0 +Models: +- Name: fast_scnn_lr0.12_8x4_160k_cityscapes + In Collection: FastSCNN + Metadata: + backbone: FastSCNN + crop size: (512,1024) + lr schd: 160000 + inference time (ms/im): + - value: 17.71 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.3 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.96 + mIoU(ms+flip): 72.65 + Config: configs/fastscnn/fast_scnn_lr0.12_8x4_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fast_scnn/fast_scnn_lr0.12_8x4_160k_cityscapes/fast_scnn_lr0.12_8x4_160k_cityscapes_20210630_164853-0cec9937.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/README.md new file mode 100644 index 0000000..09ca1a5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/README.md @@ -0,0 +1,111 @@ +# FCN + +[Fully Convolutional Networks for Semantic Segmentation](https://arxiv.org/abs/1411.4038) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Convolutional networks are powerful visual models that yield hierarchies of features. We show that convolutional networks by themselves, trained end-to-end, pixels-to-pixels, exceed the state-of-the-art in semantic segmentation. Our key insight is to build "fully convolutional" networks that take input of arbitrary size and produce correspondingly-sized output with efficient inference and learning. We define and detail the space of fully convolutional networks, explain their application to spatially dense prediction tasks, and draw connections to prior models. We adapt contemporary classification networks (AlexNet, the VGG net, and GoogLeNet) into fully convolutional networks and transfer their learned representations by fine-tuning to the segmentation task. We then define a novel architecture that combines semantic information from a deep, coarse layer with appearance information from a shallow, fine layer to produce accurate and detailed segmentations. Our fully convolutional network achieves state-of-the-art segmentation of PASCAL VOC (20% relative improvement to 62.2% mean IU on 2012), NYUDv2, and SIFT Flow, while inference takes one third of a second for a typical image. + + + +
+ +
+ +## Citation + +```bibtex +@article{shelhamer2017fully, + title={Fully convolutional networks for semantic segmentation}, + author={Shelhamer, Evan and Long, Jonathan and Darrell, Trevor}, + journal={IEEE transactions on pattern analysis and machine intelligence}, + volume={39}, + number={4}, + pages={640--651}, + year={2017}, + publisher={IEEE Trans Pattern Anal Mach Intell} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | ---------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x1024 | 40000 | 5.7 | 4.17 | 72.25 | 73.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608.log.json) | +| FCN | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.66 | 75.45 | 76.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852.log.json) | +| FCN | R-50-D8 | 769x769 | 40000 | 6.5 | 1.80 | 71.47 | 72.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104.log.json) | +| FCN | R-101-D8 | 769x769 | 40000 | 10.4 | 1.19 | 73.93 | 75.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208.log.json) | +| FCN | R-18-D8 | 512x1024 | 80000 | 1.7 | 14.65 | 71.11 | 72.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes-20201225_021327.log.json) | +| FCN | R-50-D8 | 512x1024 | 80000 | - | | 73.61 | 74.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019.log.json) | +| FCN | R-101-D8 | 512x1024 | 80000 | - | - | 75.13 | 75.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038.log.json) | +| FCN (FP16) | R-101-D8 | 512x1024 | 80000 | 5.37 | 8.64 | 76.80 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921-fb13e883.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921.log.json) | +| FCN | R-18-D8 | 769x769 | 80000 | 1.9 | 6.40 | 70.80 | 73.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes_20201225_021451-9739d1b8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes-20201225_021451.log.json) | +| FCN | R-50-D8 | 769x769 | 80000 | - | - | 72.64 | 73.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749.log.json) | +| FCN | R-101-D8 | 769x769 | 80000 | - | - | 75.52 | 76.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354.log.json) | +| FCN | R-18b-D8 | 512x1024 | 80000 | 1.6 | 16.74 | 70.24 | 72.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes_20201225_230143-92c0f445.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes-20201225_230143.log.json) | +| FCN | R-50b-D8 | 512x1024 | 80000 | 5.6 | 4.20 | 75.65 | 77.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes_20201225_094221-82957416.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes-20201225_094221.log.json) | +| FCN | R-101b-D8 | 512x1024 | 80000 | 9.1 | 2.73 | 77.37 | 78.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes_20201226_160213-4543858f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes-20201226_160213.log.json) | +| FCN | R-18b-D8 | 769x769 | 80000 | 1.7 | 6.70 | 69.66 | 72.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes_20201226_004430-32d504e5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes-20201226_004430.log.json) | +| FCN | R-50b-D8 | 769x769 | 80000 | 6.3 | 1.82 | 73.83 | 76.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes_20201225_094223-94552d38.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes-20201225_094223.log.json) | +| FCN | R-101b-D8 | 769x769 | 80000 | 10.3 | 1.15 | 77.02 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes_20201226_170012-82be37e2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes-20201226_170012.log.json) | +| FCN (D6) | R-50-D16 | 512x1024 | 40000 | 3.4 | 10.22 | 77.06 | 78.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes_20210305_130133-98d5d1bc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes-20210305_130133.log.json) | +| FCN (D6) | R-50-D16 | 512x1024 | 80000 | - | 10.35 | 77.27 | 78.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes_20210306_115604-133c292f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes-20210306_115604.log.json) | +| FCN (D6) | R-50-D16 | 769x769 | 40000 | 3.7 | 4.17 | 76.82 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes_20210305_185744-1aab18ed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes-20210305_185744.log.json) | +| FCN (D6) | R-50-D16 | 769x769 | 80000 | - | 4.15 | 77.04 | 78.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes_20210305_200413-109d88eb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes-20210305_200413.log.json) | +| FCN (D6) | R-101-D16 | 512x1024 | 40000 | 4.5 | 8.04 | 77.36 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes_20210305_130337-9cf2b450.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes-20210305_130337.log.json) | +| FCN (D6) | R-101-D16 | 512x1024 | 80000 | - | 8.26 | 78.46 | 80.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes_20210308_102747-cb336445.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes-20210308_102747.log.json) | +| FCN (D6) | R-101-D16 | 769x769 | 40000 | 5.0 | 3.12 | 77.28 | 78.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes_20210308_102453-60b114e9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes-20210308_102453.log.json) | +| FCN (D6) | R-101-D16 | 769x769 | 80000 | - | 3.21 | 78.06 | 79.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes_20210306_120016-e33adc4f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes-20210306_120016.log.json) | +| FCN (D6) | R-50b-D16 | 512x1024 | 80000 | 3.2 | 10.16 | 76.99 | 79.03 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes/fcn_d6_r50b-d16_512x1024_80k_cityscapes_20210311_125550-6a0b62e9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_512x1024_80k_cityscapes/fcn_d6_r50b_d16_512x1024_80k_cityscapes-20210311_125550.log.json) | +| FCN (D6) | R-50b-D16 | 769x769 | 80000 | 3.6 | 4.17 | 76.86 | 78.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes/fcn_d6_r50b-d16_769x769_80k_cityscapes_20210311_131012-d665f231.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b_d16_769x769_80k_cityscapes/fcn_d6_r50b_d16_769x769_80k_cityscapes-20210311_131012.log.json) | +| FCN (D6) | R-101b-D16 | 512x1024 | 80000 | 4.3 | 8.46 | 77.72 | 79.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes/fcn_d6_r101b-d16_512x1024_80k_cityscapes_20210311_144305-3f2eb5b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_512x1024_80k_cityscapes/fcn_d6_r101b_d16_512x1024_80k_cityscapes-20210311_144305.log.json) | +| FCN (D6) | R-101b-D16 | 769x769 | 80000 | 4.8 | 3.32 | 77.34 | 78.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes/fcn_d6_r101b-d16_769x769_80k_cityscapes_20210311_154527-c4d8bfbc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b_d16_769x769_80k_cityscapes/fcn_d6_r101b_d16_769x769_80k_cityscapes-20210311_154527.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x512 | 80000 | 8.5 | 23.49 | 35.94 | 37.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016.log.json) | +| FCN | R-101-D8 | 512x512 | 80000 | 12 | 14.78 | 39.61 | 40.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143.log.json) | +| FCN | R-50-D8 | 512x512 | 160000 | - | - | 36.10 | 38.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713.log.json) | +| FCN | R-101-D8 | 512x512 | 160000 | - | - | 39.91 | 41.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-50-D8 | 512x512 | 20000 | 5.7 | 23.28 | 67.08 | 69.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715.log.json) | +| FCN | R-101-D8 | 512x512 | 20000 | 9.2 | 14.81 | 71.16 | 73.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842.log.json) | +| FCN | R-50-D8 | 512x512 | 40000 | - | - | 66.97 | 69.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | +| FCN | R-101-D8 | 512x512 | 40000 | - | - | 69.91 | 72.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-101-D8 | 480x480 | 40000 | - | 9.93 | 44.43 | 45.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20210421_154757-b5e97937.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context-20210421_154757.log.json) | +| FCN | R-101-D8 | 480x480 | 80000 | - | - | 44.13 | 45.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20210421_163310-4711813f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context-20210421_163310.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | R-101-D8 | 480x480 | 40000 | - | - | 48.42 | 50.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59_20210415_230724-8cf83682.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59-20210415_230724.log.json) | +| FCN | R-101-D8 | 480x480 | 80000 | - | - | 49.35 | 51.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59_20210416_110804-9a6f2c94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59-20210416_110804.log.json) | + +Note: + +- `FP16` means Mixed Precision (FP16) is adopted in training. +- `FCN D6` means dilation rate of convolution operator in FCN is 6. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn.yml new file mode 100644 index 0000000..563391c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn.yml @@ -0,0 +1,827 @@ +Collections: +- Name: FCN + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + Paper: + URL: https://arxiv.org/abs/1411.4038 + Title: Fully Convolutional Networks for Semantic Segmentation + README: configs/fcn/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fcn_head.py#L11 + Version: v0.17.0 + Converted From: + Code: https://github.com/BVLC/caffe/wiki/Model-Zoo#fcn +Models: +- Name: fcn_r50-d8_512x1024_40k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 239.81 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.25 + mIoU(ms+flip): 73.36 + Config: configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_40k_cityscapes/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth +- Name: fcn_r101-d8_512x1024_40k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 375.94 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.45 + mIoU(ms+flip): 76.58 + Config: configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_40k_cityscapes/fcn_r101-d8_512x1024_40k_cityscapes_20200604_181852-a883d3a1.pth +- Name: fcn_r50-d8_769x769_40k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 555.56 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.47 + mIoU(ms+flip): 72.54 + Config: configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_40k_cityscapes/fcn_r50-d8_769x769_40k_cityscapes_20200606_113104-977b5d02.pth +- Name: fcn_r101-d8_769x769_40k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 840.34 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.93 + mIoU(ms+flip): 75.14 + Config: configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_40k_cityscapes/fcn_r101-d8_769x769_40k_cityscapes_20200606_113208-7d4ab69c.pth +- Name: fcn_r18-d8_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-18-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 68.26 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 1.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.11 + mIoU(ms+flip): 72.91 + Config: configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_512x1024_80k_cityscapes/fcn_r18-d8_512x1024_80k_cityscapes_20201225_021327-6c50f8b4.pth +- Name: fcn_r50-d8_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.61 + mIoU(ms+flip): 74.24 + Config: configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x1024_80k_cityscapes/fcn_r50-d8_512x1024_80k_cityscapes_20200606_113019-03aa804d.pth +- Name: fcn_r101-d8_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.13 + mIoU(ms+flip): 75.94 + Config: configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x1024_80k_cityscapes/fcn_r101-d8_512x1024_80k_cityscapes_20200606_113038-3fb937eb.pth +- Name: fcn_r101-d8_fp16_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 115.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (512,1024) + Training Memory (GB): 5.37 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.8 + Config: configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes/fcn_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230921-fb13e883.pth +- Name: fcn_r18-d8_769x769_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-18-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 156.25 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 1.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.8 + mIoU(ms+flip): 73.16 + Config: configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18-d8_769x769_80k_cityscapes/fcn_r18-d8_769x769_80k_cityscapes_20201225_021451-9739d1b8.pth +- Name: fcn_r50-d8_769x769_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.64 + mIoU(ms+flip): 73.32 + Config: configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_769x769_80k_cityscapes/fcn_r50-d8_769x769_80k_cityscapes_20200606_195749-f5caeabc.pth +- Name: fcn_r101-d8_769x769_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.52 + mIoU(ms+flip): 76.61 + Config: configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_769x769_80k_cityscapes/fcn_r101-d8_769x769_80k_cityscapes_20200606_214354-45cbac68.pth +- Name: fcn_r18b-d8_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-18b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 59.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 1.6 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.24 + mIoU(ms+flip): 72.77 + Config: configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_512x1024_80k_cityscapes/fcn_r18b-d8_512x1024_80k_cityscapes_20201225_230143-92c0f445.pth +- Name: fcn_r50b-d8_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 238.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.6 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.65 + mIoU(ms+flip): 77.59 + Config: configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_512x1024_80k_cityscapes/fcn_r50b-d8_512x1024_80k_cityscapes_20201225_094221-82957416.pth +- Name: fcn_r101b-d8_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 366.3 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.1 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.37 + mIoU(ms+flip): 78.77 + Config: configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_512x1024_80k_cityscapes/fcn_r101b-d8_512x1024_80k_cityscapes_20201226_160213-4543858f.pth +- Name: fcn_r18b-d8_769x769_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-18b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 149.25 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 1.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 69.66 + mIoU(ms+flip): 72.07 + Config: configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r18b-d8_769x769_80k_cityscapes/fcn_r18b-d8_769x769_80k_cityscapes_20201226_004430-32d504e5.pth +- Name: fcn_r50b-d8_769x769_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 549.45 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.3 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.83 + mIoU(ms+flip): 76.6 + Config: configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50b-d8_769x769_80k_cityscapes/fcn_r50b-d8_769x769_80k_cityscapes_20201225_094223-94552d38.pth +- Name: fcn_r101b-d8_769x769_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 869.57 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.3 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.02 + mIoU(ms+flip): 78.67 + Config: configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101b-d8_769x769_80k_cityscapes/fcn_r101b-d8_769x769_80k_cityscapes_20201226_170012-82be37e2.pth +- Name: fcn_d6_r50-d16_512x1024_40k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50-D16 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 97.85 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.06 + mIoU(ms+flip): 78.85 + Config: configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes/fcn_d6_r50-d16_512x1024_40k_cityscapes_20210305_130133-98d5d1bc.pth +- Name: fcn_d6_r50-d16_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50-D16 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 96.62 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.27 + mIoU(ms+flip): 78.88 + Config: configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes/fcn_d6_r50-d16_512x1024_80k_cityscapes_20210306_115604-133c292f.pth +- Name: fcn_d6_r50-d16_769x769_40k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50-D16 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 239.81 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 3.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.82 + mIoU(ms+flip): 78.22 + Config: configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes/fcn_d6_r50-d16_769x769_40k_cityscapes_20210305_185744-1aab18ed.pth +- Name: fcn_d6_r50-d16_769x769_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50-D16 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 240.96 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.04 + mIoU(ms+flip): 78.4 + Config: configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes/fcn_d6_r50-d16_769x769_80k_cityscapes_20210305_200413-109d88eb.pth +- Name: fcn_d6_r101-d16_512x1024_40k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101-D16 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 124.38 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 4.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.36 + mIoU(ms+flip): 79.18 + Config: configs/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes/fcn_d6_r101-d16_512x1024_40k_cityscapes_20210305_130337-9cf2b450.pth +- Name: fcn_d6_r101-d16_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101-D16 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 121.07 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.46 + mIoU(ms+flip): 80.42 + Config: configs/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes/fcn_d6_r101-d16_512x1024_80k_cityscapes_20210308_102747-cb336445.pth +- Name: fcn_d6_r101-d16_769x769_40k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101-D16 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 320.51 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 5.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.28 + mIoU(ms+flip): 78.95 + Config: configs/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes/fcn_d6_r101-d16_769x769_40k_cityscapes_20210308_102453-60b114e9.pth +- Name: fcn_d6_r101-d16_769x769_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101-D16 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 311.53 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.06 + mIoU(ms+flip): 79.58 + Config: configs/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes/fcn_d6_r101-d16_769x769_80k_cityscapes_20210306_120016-e33adc4f.pth +- Name: fcn_d6_r50b-d16_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50b-D16 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 98.43 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.99 + mIoU(ms+flip): 79.03 + Config: configs/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes/fcn_d6_r50b-d16_512x1024_80k_cityscapes_20210311_125550-6a0b62e9.pth +- Name: fcn_d6_r50b-d16_769x769_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-50b-D16 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 239.81 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 3.6 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.86 + mIoU(ms+flip): 78.52 + Config: configs/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes/fcn_d6_r50b-d16_769x769_80k_cityscapes_20210311_131012-d665f231.pth +- Name: fcn_d6_r101b-d16_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101b-D16 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 118.2 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 4.3 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.72 + mIoU(ms+flip): 79.53 + Config: configs/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes/fcn_d6_r101b-d16_512x1024_80k_cityscapes_20210311_144305-3f2eb5b4.pth +- Name: fcn_d6_r101b-d16_769x769_80k_cityscapes + In Collection: FCN + Metadata: + backbone: R-101b-D16 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 301.2 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 4.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.34 + mIoU(ms+flip): 78.91 + Config: configs/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes/fcn_d6_r101b-d16_769x769_80k_cityscapes_20210311_154527-c4d8bfbc.pth +- Name: fcn_r50-d8_512x512_80k_ade20k + In Collection: FCN + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 42.57 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.5 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 35.94 + mIoU(ms+flip): 37.94 + Config: configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_80k_ade20k/fcn_r50-d8_512x512_80k_ade20k_20200614_144016-f8ac5082.pth +- Name: fcn_r101-d8_512x512_80k_ade20k + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 67.66 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.0 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.61 + mIoU(ms+flip): 40.83 + Config: configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_80k_ade20k/fcn_r101-d8_512x512_80k_ade20k_20200615_014143-bc1809f7.pth +- Name: fcn_r50-d8_512x512_160k_ade20k + In Collection: FCN + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.1 + mIoU(ms+flip): 38.08 + Config: configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_160k_ade20k/fcn_r50-d8_512x512_160k_ade20k_20200615_100713-4edbc3b4.pth +- Name: fcn_r101-d8_512x512_160k_ade20k + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.91 + mIoU(ms+flip): 41.4 + Config: configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_160k_ade20k/fcn_r101-d8_512x512_160k_ade20k_20200615_105816-fd192bd5.pth +- Name: fcn_r50-d8_512x512_20k_voc12aug + In Collection: FCN + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 42.96 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 5.7 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 67.08 + mIoU(ms+flip): 69.94 + Config: configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_20k_voc12aug/fcn_r50-d8_512x512_20k_voc12aug_20200617_010715-52dc5306.pth +- Name: fcn_r101-d8_512x512_20k_voc12aug + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 67.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.2 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 71.16 + mIoU(ms+flip): 73.57 + Config: configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_20k_voc12aug/fcn_r101-d8_512x512_20k_voc12aug_20200617_010842-0bb4e798.pth +- Name: fcn_r50-d8_512x512_40k_voc12aug + In Collection: FCN + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 66.97 + mIoU(ms+flip): 69.04 + Config: configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r50-d8_512x512_40k_voc12aug/fcn_r50-d8_512x512_40k_voc12aug_20200613_161222-5e2dbf40.pth +- Name: fcn_r101-d8_512x512_40k_voc12aug + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 69.91 + mIoU(ms+flip): 72.38 + Config: configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_512x512_40k_voc12aug/fcn_r101-d8_512x512_40k_voc12aug_20200613_161240-4c8bcefd.pth +- Name: fcn_r101-d8_480x480_40k_pascal_context + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 40000 + inference time (ms/im): + - value: 100.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (480,480) + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 44.43 + mIoU(ms+flip): 45.63 + Config: configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context/fcn_r101-d8_480x480_40k_pascal_context_20210421_154757-b5e97937.pth +- Name: fcn_r101-d8_480x480_80k_pascal_context + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 44.13 + mIoU(ms+flip): 45.26 + Config: configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context/fcn_r101-d8_480x480_80k_pascal_context_20210421_163310-4711813f.pth +- Name: fcn_r101-d8_480x480_40k_pascal_context_59 + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 48.42 + mIoU(ms+flip): 50.4 + Config: configs/fcn/fcn_r101-d8_480x480_40k_pascal_context_59.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_40k_pascal_context_59/fcn_r101-d8_480x480_40k_pascal_context_59_20210415_230724-8cf83682.pth +- Name: fcn_r101-d8_480x480_80k_pascal_context_59 + In Collection: FCN + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 49.35 + mIoU(ms+flip): 51.38 + Config: configs/fcn/fcn_r101-d8_480x480_80k_pascal_context_59.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/fcn/fcn_r101-d8_480x480_80k_pascal_context_59/fcn_r101-d8_480x480_80k_pascal_context_59_20210416_110804-9a6f2c94.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..aec4254 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..d0bafc5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes.py new file mode 100644 index 0000000..29a9f98 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes.py new file mode 100644 index 0000000..1f21c65 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101-d16_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..af3f765 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101b-d16_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './fcn_d6_r50b-d16_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes.py new file mode 100644 index 0000000..e3d4d88 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r101b-d16_769x769_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './fcn_d6_r50b-d16_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..f30646e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_512x1024_40k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(dilation=6), + auxiliary_head=dict(dilation=6)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..e4b623a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_512x1024_80k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(dilation=6), + auxiliary_head=dict(dilation=6)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py new file mode 100644 index 0000000..01d8f27 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_769x769_40k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(align_corners=True, dilation=6), + auxiliary_head=dict(align_corners=True, dilation=6), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py new file mode 100644 index 0000000..c5ef3b8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50-d16_769x769_80k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + backbone=dict(dilations=(1, 1, 1, 2), strides=(1, 2, 2, 1)), + decode_head=dict(align_corners=True, dilation=6), + auxiliary_head=dict(align_corners=True, dilation=6), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..0749ff1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50b-d16_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_512x1024_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes.py new file mode 100644 index 0000000..fba8948 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_d6_r50b-d16_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_d6_r50-d16_769x769_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..f3a15b4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..908f4bf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_480x480_40k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..bdccfd9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..09cb612 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_480x480_80k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..7918dd1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..528110d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..1bf6780 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..09a5fe5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..eafefaa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..6d02945 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..6b4cc57 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..3503c76 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..c6739d9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = './fcn_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..1b9bf60 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..f36eb02 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r101b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..5a1d29e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..6644a58 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..92accfc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..5dd34dd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r18b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..7c57a6f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..4a81800 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..df6d25b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..02507cc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..401c6ea --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..990a085 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..9ca7fd2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..17206a5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..8cec429 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..ef194cb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..fca98c1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..7d75cd9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/fcn_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..28ef13f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..106f7b6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/fcn/fcn_r50b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fcn_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/README.md new file mode 100644 index 0000000..9a4cf7a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/README.md @@ -0,0 +1,68 @@ +# GCNet + +[GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond](https://arxiv.org/abs/1904.11492) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The Non-Local Network (NLNet) presents a pioneering approach for capturing long-range dependencies, via aggregating query-specific global context to each query position. However, through a rigorous empirical analysis, we have found that the global contexts modeled by non-local network are almost the same for different query positions within an image. In this paper, we take advantage of this finding to create a simplified network based on a query-independent formulation, which maintains the accuracy of NLNet but with significantly less computation. We further observe that this simplified design shares similar structure with Squeeze-Excitation Network (SENet). Hence we unify them into a three-step general framework for global context modeling. Within the general framework, we design a better instantiation, called the global context (GC) block, which is lightweight and can effectively model the global context. The lightweight property allows us to apply it for multiple layers in a backbone network to construct a global context network (GCNet), which generally outperforms both simplified NLNet and SENet on major benchmarks for various recognition tasks. The code and configurations are released at [this https URL](https://github.com/xvjiarui/GCNet). + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{cao2019gcnet, + title={Gcnet: Non-local networks meet squeeze-excitation networks and beyond}, + author={Cao, Yue and Xu, Jiarui and Lin, Stephen and Wei, Fangyun and Hu, Han}, + booktitle={Proceedings of the IEEE International Conference on Computer Vision Workshops}, + pages={0--0}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| GCNet | R-50-D8 | 512x1024 | 40000 | 5.8 | 3.93 | 77.69 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436.log.json) | +| GCNet | R-101-D8 | 512x1024 | 40000 | 9.2 | 2.61 | 78.28 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436.log.json) | +| GCNet | R-50-D8 | 769x769 | 40000 | 6.5 | 1.67 | 78.12 | 80.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814.log.json) | +| GCNet | R-101-D8 | 769x769 | 40000 | 10.5 | 1.13 | 78.95 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550.log.json) | +| GCNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.48 | 80.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450.log.json) | +| GCNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.03 | 79.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450.log.json) | +| GCNet | R-50-D8 | 769x769 | 80000 | - | - | 78.68 | 80.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516.log.json) | +| GCNet | R-101-D8 | 769x769 | 80000 | - | - | 79.18 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| GCNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.38 | 41.47 | 42.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146.log.json) | +| GCNet | R-101-D8 | 512x512 | 80000 | 12 | 15.20 | 42.82 | 44.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811.log.json) | +| GCNet | R-50-D8 | 512x512 | 160000 | - | - | 42.37 | 43.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122.log.json) | +| GCNet | R-101-D8 | 512x512 | 160000 | - | - | 43.69 | 45.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| GCNet | R-50-D8 | 512x512 | 20000 | 5.8 | 23.35 | 76.42 | 77.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701.log.json) | +| GCNet | R-101-D8 | 512x512 | 20000 | 9.2 | 14.80 | 77.41 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713.log.json) | +| GCNet | R-50-D8 | 512x512 | 40000 | - | - | 76.24 | 77.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105.log.json) | +| GCNet | R-101-D8 | 512x512 | 40000 | - | - | 77.84 | 78.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet.yml new file mode 100644 index 0000000..1d5eecf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet.yml @@ -0,0 +1,305 @@ +Collections: +- Name: GCNet + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + URL: https://arxiv.org/abs/1904.11492 + Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond' + README: configs/gcnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/gc_head.py#L10 + Version: v0.17.0 + Converted From: + Code: https://github.com/xvjiarui/GCNet +Models: +- Name: gcnet_r50-d8_512x1024_40k_cityscapes + In Collection: GCNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 254.45 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.69 + mIoU(ms+flip): 78.56 + Config: configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes/gcnet_r50-d8_512x1024_40k_cityscapes_20200618_074436-4b0fd17b.pth +- Name: gcnet_r101-d8_512x1024_40k_cityscapes + In Collection: GCNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 383.14 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.28 + mIoU(ms+flip): 79.34 + Config: configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes/gcnet_r101-d8_512x1024_40k_cityscapes_20200618_074436-5e62567f.pth +- Name: gcnet_r50-d8_769x769_40k_cityscapes + In Collection: GCNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 598.8 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.12 + mIoU(ms+flip): 80.09 + Config: configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_40k_cityscapes/gcnet_r50-d8_769x769_40k_cityscapes_20200618_182814-a26f4471.pth +- Name: gcnet_r101-d8_769x769_40k_cityscapes + In Collection: GCNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 884.96 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.95 + mIoU(ms+flip): 80.71 + Config: configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_40k_cityscapes/gcnet_r101-d8_769x769_40k_cityscapes_20200619_092550-ca4f0a84.pth +- Name: gcnet_r50-d8_512x1024_80k_cityscapes + In Collection: GCNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.48 + mIoU(ms+flip): 80.01 + Config: configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes/gcnet_r50-d8_512x1024_80k_cityscapes_20200618_074450-ef8f069b.pth +- Name: gcnet_r101-d8_512x1024_80k_cityscapes + In Collection: GCNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.03 + mIoU(ms+flip): 79.84 + Config: configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes/gcnet_r101-d8_512x1024_80k_cityscapes_20200618_074450-778ebf69.pth +- Name: gcnet_r50-d8_769x769_80k_cityscapes + In Collection: GCNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.68 + mIoU(ms+flip): 80.66 + Config: configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_769x769_80k_cityscapes/gcnet_r50-d8_769x769_80k_cityscapes_20200619_092516-4839565b.pth +- Name: gcnet_r101-d8_769x769_80k_cityscapes + In Collection: GCNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.18 + mIoU(ms+flip): 80.71 + Config: configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_769x769_80k_cityscapes/gcnet_r101-d8_769x769_80k_cityscapes_20200619_092628-8e043423.pth +- Name: gcnet_r50-d8_512x512_80k_ade20k + In Collection: GCNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 42.77 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.5 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.47 + mIoU(ms+flip): 42.85 + Config: configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_80k_ade20k/gcnet_r50-d8_512x512_80k_ade20k_20200614_185146-91a6da41.pth +- Name: gcnet_r101-d8_512x512_80k_ade20k + In Collection: GCNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 65.79 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.0 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.82 + mIoU(ms+flip): 44.54 + Config: configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_80k_ade20k/gcnet_r101-d8_512x512_80k_ade20k_20200615_020811-c3fcb6dd.pth +- Name: gcnet_r50-d8_512x512_160k_ade20k + In Collection: GCNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.37 + mIoU(ms+flip): 43.52 + Config: configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_160k_ade20k/gcnet_r50-d8_512x512_160k_ade20k_20200615_224122-d95f3e1f.pth +- Name: gcnet_r101-d8_512x512_160k_ade20k + In Collection: GCNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.69 + mIoU(ms+flip): 45.21 + Config: configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_160k_ade20k/gcnet_r101-d8_512x512_160k_ade20k_20200615_225406-615528d7.pth +- Name: gcnet_r50-d8_512x512_20k_voc12aug + In Collection: GCNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 42.83 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 5.8 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.42 + mIoU(ms+flip): 77.51 + Config: configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_20k_voc12aug/gcnet_r50-d8_512x512_20k_voc12aug_20200617_165701-3cbfdab1.pth +- Name: gcnet_r101-d8_512x512_20k_voc12aug + In Collection: GCNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 67.57 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.2 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.41 + mIoU(ms+flip): 78.56 + Config: configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_20k_voc12aug/gcnet_r101-d8_512x512_20k_voc12aug_20200617_165713-6c720aa9.pth +- Name: gcnet_r50-d8_512x512_40k_voc12aug + In Collection: GCNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.24 + mIoU(ms+flip): 77.63 + Config: configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r50-d8_512x512_40k_voc12aug/gcnet_r50-d8_512x512_40k_voc12aug_20200613_195105-9797336d.pth +- Name: gcnet_r101-d8_512x512_40k_voc12aug + In Collection: GCNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.84 + mIoU(ms+flip): 78.59 + Config: configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/gcnet/gcnet_r101-d8_512x512_40k_voc12aug/gcnet_r101-d8_512x512_40k_voc12aug_20200613_185806-1e38208d.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..27bd942 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..7f0f83f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..9888120 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..1b70ca8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..b17c7a1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..a2183fc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..08a6031 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..5efb613 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './gcnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..610467c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..155e28f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..1549a4d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..a496204 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..d85cf65 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..89d5e1a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..332495d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..d6d9cb1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/gcnet/gcnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/gcnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/README.md new file mode 100644 index 0000000..9ebbf4d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/README.md @@ -0,0 +1,122 @@ +# HRNet + +[Deep High-Resolution Representation Learning for Human Pose Estimation](https://arxiv.org/abs/1908.07919) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +High-resolution representations are essential for position-sensitive vision problems, such as human pose estimation, semantic segmentation, and object detection. Existing state-of-the-art frameworks first encode the input image as a low-resolution representation through a subnetwork that is formed by connecting high-to-low resolution convolutions \\emph{in series} (e.g., ResNet, VGGNet), and then recover the high-resolution representation from the encoded low-resolution representation. Instead, our proposed network, named as High-Resolution Network (HRNet), maintains high-resolution representations through the whole process. There are two key characteristics: (i) Connect the high-to-low resolution convolution streams \\emph{in parallel}; (ii) Repeatedly exchange the information across resolutions. The benefit is that the resulting representation is semantically richer and spatially more precise. We show the superiority of the proposed HRNet in a wide range of applications, including human pose estimation, semantic segmentation, and object detection, suggesting that the HRNet is a stronger backbone for computer vision problems. All the codes are available at [this https URL](https://github.com/HRNet). + + + +
+ +
+ +## Citation + +```bibtext +@inproceedings{SunXLW19, + title={Deep High-Resolution Representation Learning for Human Pose Estimation}, + author={Ke Sun and Bin Xiao and Dong Liu and Jingdong Wang}, + booktitle={CVPR}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x1024 | 40000 | 1.7 | 23.74 | 73.86 | 75.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 40000 | 2.9 | 12.97 | 77.19 | 78.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 40000 | 6.2 | 6.42 | 78.48 | 79.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240.log.json) | +| FCN | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 75.31 | 77.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.65 | 80.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 79.93 | 80.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606.log.json) | +| FCN | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 76.31 | 78.31 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901.log.json) | +| FCN | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 78.80 | 80.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822.log.json) | +| FCN | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 80.65 | 81.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 3.8 | 38.66 | 31.38 | 32.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 4.9 | 22.57 | 36.27 | 37.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910-6c9382c0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 8.2 | 21.23 | 41.90 | 43.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946.log.json) | +| FCN | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | 33.07 | 34.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739-f1e7c2e7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 160000 | - | - | 36.79 | 38.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 160000 | - | - | 42.02 | 43.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x512 | 20000 | 1.8 | 43.36 | 65.5 | 68.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910-0aceadb4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 20000 | 2.9 | 23.48 | 72.30 | 74.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 20000 | 6.2 | 22.05 | 75.87 | 78.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419.log.json) | +| FCN | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 66.61 | 70.00 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 40000 | - | - | 72.90 | 75.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 40000 | - | - | 76.24 | 78.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W48 | 480x480 | 40000 | 6.1 | 8.86 | 45.14 | 47.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context-20200911_164852.log.json) | +| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | 45.84 | 47.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context-20200911_155322.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W48 | 480x480 | 40000 | - | - | 50.33 | 52.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59_20210410_122738-b808b8b2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59-20210410_122738.log.json) | +| FCN | HRNetV2p-W48 | 480x480 | 80000 | - | - | 51.12 | 53.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59_20210411_003240-3ae7081e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59-20210411_003240.log.json) | + +### LoveDA + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.59 | 24.87 | 49.28 | 49.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_80k_loveda.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228-60a86a7a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 12.92 | 50.81 | 50.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_80k_loveda.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952-93d9c3b3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 9.61 | 51.42 | 51.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_80k_loveda.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756-67072f55.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756.log.json) | + +### Potsdam + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 36.00 | 77.64 | 78.8 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517-ba32af63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.25 | 78.26 | 79.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517-5d0387ad.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 16.42 | 78.39 | 79.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601.log.json) | + +### Vaihingen + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | HRNetV2p-W18-Small | 512x512 | 80000 | 1.58 | 38.11 | 71.81 | 73.1 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909-b23aae02.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909.log.json) | +| FCN | HRNetV2p-W18 | 512x512 | 80000 | 2.76 | 19.55 | 72.57 | 74.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216-2ec3ae8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216.log.json) | +| FCN | HRNetV2p-W48 | 512x512 | 80000 | 6.20 | 17.25 | 72.50 | 73.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244-7133cb22.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244.log.json) | + +### iSAID + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | HRNetV2p-W18-Small | 896x896 | 80000 | 4.95 | 13.84 | 62.30 | 62.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18s_4x4_896x896_80k_isaid.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603-3cc0769b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603.log.json) | +| FCN | HRNetV2p-W18 | 896x896 | 80000 | 8.30 | 7.71 | 65.06 | 65.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr18_4x4_896x896_80k_isaid.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230-49bf752e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230.log.json) | +| FCN | HRNetV2p-W48 | 896x896 | 80000 | 16.89 | 7.34 | 67.80 | 68.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet/fcn_hr48_4x4_896x896_80k_isaid.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643-547fc420.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643.log.json) | + +Note: + +- `896x896` is the Crop Size of iSAID dataset, which is followed by the implementation of [PointFlow: Flowing Semantics Through Points for Aerial Image Segmentation](https://arxiv.org/pdf/2103.06564.pdf) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py new file mode 100644 index 0000000..5ff05aa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_40k_pascal_context.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..d2eecf0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_40k_pascal_context_59.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context_59.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py new file mode 100644 index 0000000..cf315a4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_80k_pascal_context.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..9cbf410 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_480x480_80k_pascal_context_59.py @@ -0,0 +1,8 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_context_59.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py new file mode 100644 index 0000000..3585a7c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/vaihingen.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(decode_head=dict(num_classes=6)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_4x4_896x896_80k_isaid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_4x4_896x896_80k_isaid.py new file mode 100644 index 0000000..62e6d6b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_4x4_896x896_80k_isaid.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/isaid.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(decode_head=dict(num_classes=16)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py new file mode 100644 index 0000000..9f04e93 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..99760c3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..a653dda --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py new file mode 100644 index 0000000..45ed99b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_160k_ade20k.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict(decode_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py new file mode 100644 index 0000000..f06448b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +model = dict(decode_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py new file mode 100644 index 0000000..d74e959 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/pascal_voc12_aug.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(decode_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py new file mode 100644 index 0000000..52bc9f5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_80k_ade20k.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(decode_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_80k_loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_80k_loveda.py new file mode 100644 index 0000000..3bc4d0a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_80k_loveda.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/loveda.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(decode_head=dict(num_classes=7)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_80k_potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_80k_potsdam.py new file mode 100644 index 0000000..043017f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18_512x512_80k_potsdam.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fcn_hr18.py', '../_base_/datasets/potsdam.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(decode_head=dict(num_classes=6)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py new file mode 100644 index 0000000..d099310 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_480x480_40k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..0412c64 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_40k_pascal_context_59.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_480x480_40k_pascal_context_59.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py new file mode 100644 index 0000000..584b713 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_480x480_80k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..babd88d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_480x480_80k_pascal_context_59.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_480x480_80k_pascal_context_59.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen.py new file mode 100644 index 0000000..5828fe1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4x4_512x512_80k_vaihingen.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_4x4_896x896_80k_isaid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_4x4_896x896_80k_isaid.py new file mode 100644 index 0000000..d6f6c65 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_4x4_896x896_80k_isaid.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_4x4_896x896_80k_isaid.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py new file mode 100644 index 0000000..ddbe380 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x1024_160k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..4e31d26 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..ee2831d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py new file mode 100644 index 0000000..22a3ce0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py new file mode 100644 index 0000000..d0de5df --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x512_20k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py new file mode 100644 index 0000000..409db3c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x512_40k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py new file mode 100644 index 0000000..a840097 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x512_80k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_80k_loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_80k_loveda.py new file mode 100644 index 0000000..b39769f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_80k_loveda.py @@ -0,0 +1,11 @@ +_base_ = './fcn_hr18_512x512_80k_loveda.py' +model = dict( + backbone=dict( + init_cfg=dict( + type='Pretrained', + checkpoint='open-mmlab://msra/hrnetv2_w18_small'), + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_80k_potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_80k_potsdam.py new file mode 100644 index 0000000..0555127 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr18s_512x512_80k_potsdam.py @@ -0,0 +1,9 @@ +_base_ = './fcn_hr18_512x512_80k_potsdam.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py new file mode 100644 index 0000000..0e2d96c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_480x480_40k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..655b460 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_480x480_40k_pascal_context_59.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py new file mode 100644 index 0000000..e28164e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_480x480_80k_pascal_context.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..012ad0a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_480x480_80k_pascal_context_59.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen.py new file mode 100644 index 0000000..7cb22d8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4x4_512x512_80k_vaihingen.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_4x4_896x896_80k_isaid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_4x4_896x896_80k_isaid.py new file mode 100644 index 0000000..55cf1b5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_4x4_896x896_80k_isaid.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_4x4_896x896_80k_isaid.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py new file mode 100644 index 0000000..394a61c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x1024_160k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..d37ab1d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..a9bab32 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py new file mode 100644 index 0000000..dff4fea --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_160k_ade20k.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py new file mode 100644 index 0000000..a8d1deb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x512_20k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py new file mode 100644 index 0000000..1084a57 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x512_40k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py new file mode 100644 index 0000000..7eca7fa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_80k_ade20k.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x512_80k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_80k_loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_80k_loveda.py new file mode 100644 index 0000000..269dbf6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_80k_loveda.py @@ -0,0 +1,11 @@ +_base_ = './fcn_hr18_512x512_80k_loveda.py' +model = dict( + backbone=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w48'), + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_80k_potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_80k_potsdam.py new file mode 100644 index 0000000..608fee3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/fcn_hr48_512x512_80k_potsdam.py @@ -0,0 +1,10 @@ +_base_ = './fcn_hr18_512x512_80k_potsdam.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=dict( + in_channels=[48, 96, 192, 384], channels=sum([48, 96, 192, 384]))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/hrnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/hrnet.yml new file mode 100644 index 0000000..960a937 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/hrnet/hrnet.yml @@ -0,0 +1,695 @@ +Models: +- Name: fcn_hr18s_512x1024_40k_cityscapes + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 42.12 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 1.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.86 + mIoU(ms+flip): 75.91 + Config: configs/hrnet/fcn_hr18s_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_40k_cityscapes/fcn_hr18s_512x1024_40k_cityscapes_20200601_014216-93db27d0.pth +- Name: fcn_hr18_512x1024_40k_cityscapes + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 77.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 2.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.19 + mIoU(ms+flip): 78.92 + Config: configs/hrnet/fcn_hr18_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_40k_cityscapes/fcn_hr18_512x1024_40k_cityscapes_20200601_014216-f196fb4e.pth +- Name: fcn_hr48_512x1024_40k_cityscapes + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 155.76 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.48 + mIoU(ms+flip): 79.69 + Config: configs/hrnet/fcn_hr48_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_40k_cityscapes/fcn_hr48_512x1024_40k_cityscapes_20200601_014240-a989b146.pth +- Name: fcn_hr18s_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.31 + mIoU(ms+flip): 77.48 + Config: configs/hrnet/fcn_hr18s_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_80k_cityscapes/fcn_hr18s_512x1024_80k_cityscapes_20200601_202700-1462b75d.pth +- Name: fcn_hr18_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.65 + mIoU(ms+flip): 80.35 + Config: configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_80k_cityscapes/fcn_hr18_512x1024_80k_cityscapes_20200601_223255-4e7b345e.pth +- Name: fcn_hr48_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.93 + mIoU(ms+flip): 80.72 + Config: configs/hrnet/fcn_hr48_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_80k_cityscapes/fcn_hr48_512x1024_80k_cityscapes_20200601_202606-58ea95d6.pth +- Name: fcn_hr18s_512x1024_160k_cityscapes + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,1024) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.31 + mIoU(ms+flip): 78.31 + Config: configs/hrnet/fcn_hr18s_512x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x1024_160k_cityscapes/fcn_hr18s_512x1024_160k_cityscapes_20200602_190901-4a0797ea.pth +- Name: fcn_hr18_512x1024_160k_cityscapes + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,1024) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.8 + mIoU(ms+flip): 80.74 + Config: configs/hrnet/fcn_hr18_512x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x1024_160k_cityscapes/fcn_hr18_512x1024_160k_cityscapes_20200602_190822-221e4a4f.pth +- Name: fcn_hr48_512x1024_160k_cityscapes + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,1024) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.65 + mIoU(ms+flip): 81.92 + Config: configs/hrnet/fcn_hr48_512x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x1024_160k_cityscapes/fcn_hr48_512x1024_160k_cityscapes_20200602_190946-59b7973e.pth +- Name: fcn_hr18s_512x512_80k_ade20k + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 25.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 3.8 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 31.38 + mIoU(ms+flip): 32.45 + Config: configs/hrnet/fcn_hr18s_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_ade20k/fcn_hr18s_512x512_80k_ade20k_20200614_144345-77fc814a.pth +- Name: fcn_hr18_512x512_80k_ade20k + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 44.31 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 4.9 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.27 + mIoU(ms+flip): 37.28 + Config: configs/hrnet/fcn_hr18_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_ade20k/fcn_hr18_512x512_80k_ade20k_20210827_114910-6c9382c0.pth +- Name: fcn_hr48_512x512_80k_ade20k + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 47.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.9 + mIoU(ms+flip): 43.27 + Config: configs/hrnet/fcn_hr48_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_ade20k/fcn_hr48_512x512_80k_ade20k_20200614_193946-7ba5258d.pth +- Name: fcn_hr18s_512x512_160k_ade20k + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 33.07 + mIoU(ms+flip): 34.56 + Config: configs/hrnet/fcn_hr18s_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_160k_ade20k/fcn_hr18s_512x512_160k_ade20k_20210829_174739-f1e7c2e7.pth +- Name: fcn_hr18_512x512_160k_ade20k + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 36.79 + mIoU(ms+flip): 38.58 + Config: configs/hrnet/fcn_hr18_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_160k_ade20k/fcn_hr18_512x512_160k_ade20k_20200614_214426-ca961836.pth +- Name: fcn_hr48_512x512_160k_ade20k + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.02 + mIoU(ms+flip): 43.86 + Config: configs/hrnet/fcn_hr48_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_160k_ade20k/fcn_hr48_512x512_160k_ade20k_20200614_214407-a52fc02c.pth +- Name: fcn_hr18s_512x512_20k_voc12aug + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 23.06 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.8 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 65.5 + mIoU(ms+flip): 68.89 + Config: configs/hrnet/fcn_hr18s_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_20k_voc12aug/fcn_hr18s_512x512_20k_voc12aug_20210829_174910-0aceadb4.pth +- Name: fcn_hr18_512x512_20k_voc12aug + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 42.59 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 2.9 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 72.3 + mIoU(ms+flip): 74.71 + Config: configs/hrnet/fcn_hr18_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_20k_voc12aug/fcn_hr18_512x512_20k_voc12aug_20200617_224503-488d45f7.pth +- Name: fcn_hr48_512x512_20k_voc12aug + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 45.35 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.2 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.87 + mIoU(ms+flip): 78.58 + Config: configs/hrnet/fcn_hr48_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_20k_voc12aug/fcn_hr48_512x512_20k_voc12aug_20200617_224419-89de05cd.pth +- Name: fcn_hr18s_512x512_40k_voc12aug + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 66.61 + mIoU(ms+flip): 70.0 + Config: configs/hrnet/fcn_hr18s_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_40k_voc12aug/fcn_hr18s_512x512_40k_voc12aug_20200614_000648-4f8d6e7f.pth +- Name: fcn_hr18_512x512_40k_voc12aug + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 72.9 + mIoU(ms+flip): 75.59 + Config: configs/hrnet/fcn_hr18_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_40k_voc12aug/fcn_hr18_512x512_40k_voc12aug_20200613_224401-1b4b76cd.pth +- Name: fcn_hr48_512x512_40k_voc12aug + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.24 + mIoU(ms+flip): 78.49 + Config: configs/hrnet/fcn_hr48_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_40k_voc12aug/fcn_hr48_512x512_40k_voc12aug_20200613_222111-1b0f18bc.pth +- Name: fcn_hr48_480x480_40k_pascal_context + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (480,480) + lr schd: 40000 + inference time (ms/im): + - value: 112.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (480,480) + Training Memory (GB): 6.1 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 45.14 + mIoU(ms+flip): 47.42 + Config: configs/hrnet/fcn_hr48_480x480_40k_pascal_context.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context/fcn_hr48_480x480_40k_pascal_context_20200911_164852-667d00b0.pth +- Name: fcn_hr48_480x480_80k_pascal_context + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (480,480) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 45.84 + mIoU(ms+flip): 47.84 + Config: configs/hrnet/fcn_hr48_480x480_80k_pascal_context.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context/fcn_hr48_480x480_80k_pascal_context_20200911_155322-847a6711.pth +- Name: fcn_hr48_480x480_40k_pascal_context_59 + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (480,480) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 50.33 + mIoU(ms+flip): 52.83 + Config: configs/hrnet/fcn_hr48_480x480_40k_pascal_context_59.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_40k_pascal_context_59/fcn_hr48_480x480_40k_pascal_context_59_20210410_122738-b808b8b2.pth +- Name: fcn_hr48_480x480_80k_pascal_context_59 + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (480,480) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 51.12 + mIoU(ms+flip): 53.56 + Config: configs/hrnet/fcn_hr48_480x480_80k_pascal_context_59.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_480x480_80k_pascal_context_59/fcn_hr48_480x480_80k_pascal_context_59_20210411_003240-3ae7081e.pth +- Name: fcn_hr18s_512x512_80k_loveda + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 40.21 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.59 + Results: + - Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 49.28 + mIoU(ms+flip): 49.42 + Config: configs/hrnet/fcn_hr18s_512x512_80k_loveda.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_loveda/fcn_hr18s_512x512_80k_loveda_20211210_203228-60a86a7a.pth +- Name: fcn_hr18_512x512_80k_loveda + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 77.4 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 2.76 + Results: + - Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.81 + mIoU(ms+flip): 50.95 + Config: configs/hrnet/fcn_hr18_512x512_80k_loveda.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_loveda/fcn_hr18_512x512_80k_loveda_20211210_203952-93d9c3b3.pth +- Name: fcn_hr48_512x512_80k_loveda + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 104.06 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.2 + Results: + - Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 51.42 + mIoU(ms+flip): 51.64 + Config: configs/hrnet/fcn_hr48_512x512_80k_loveda.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_loveda/fcn_hr48_512x512_80k_loveda_20211211_044756-67072f55.pth +- Name: fcn_hr18s_512x512_80k_potsdam + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 27.78 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.58 + Results: + - Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 77.64 + mIoU(ms+flip): 78.8 + Config: configs/hrnet/fcn_hr18s_512x512_80k_potsdam.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_512x512_80k_potsdam/fcn_hr18s_512x512_80k_potsdam_20211218_205517-ba32af63.pth +- Name: fcn_hr18_512x512_80k_potsdam + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 51.95 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 2.76 + Results: + - Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.26 + mIoU(ms+flip): 79.24 + Config: configs/hrnet/fcn_hr18_512x512_80k_potsdam.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_512x512_80k_potsdam/fcn_hr18_512x512_80k_potsdam_20211218_205517-5d0387ad.pth +- Name: fcn_hr48_512x512_80k_potsdam + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 60.9 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.2 + Results: + - Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.39 + mIoU(ms+flip): 79.34 + Config: configs/hrnet/fcn_hr48_512x512_80k_potsdam.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_512x512_80k_potsdam/fcn_hr48_512x512_80k_potsdam_20211219_020601-97434c78.pth +- Name: fcn_hr18s_4x4_512x512_80k_vaihingen + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 26.24 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.58 + Results: + - Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 71.81 + mIoU(ms+flip): 73.1 + Config: configs/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_512x512_80k_vaihingen/fcn_hr18s_4x4_512x512_80k_vaihingen_20211231_230909-b23aae02.pth +- Name: fcn_hr18_4x4_512x512_80k_vaihingen + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 51.15 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 2.76 + Results: + - Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.57 + mIoU(ms+flip): 74.09 + Config: configs/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_512x512_80k_vaihingen/fcn_hr18_4x4_512x512_80k_vaihingen_20211231_231216-2ec3ae8a.pth +- Name: fcn_hr48_4x4_512x512_80k_vaihingen + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 57.97 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.2 + Results: + - Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.5 + mIoU(ms+flip): 73.52 + Config: configs/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_512x512_80k_vaihingen/fcn_hr48_4x4_512x512_80k_vaihingen_20211231_231244-7133cb22.pth +- Name: fcn_hr18s_4x4_896x896_80k_isaid + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (896,896) + lr schd: 80000 + inference time (ms/im): + - value: 72.25 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (896,896) + Training Memory (GB): 4.95 + Results: + - Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 62.3 + mIoU(ms+flip): 62.97 + Config: configs/hrnet/fcn_hr18s_4x4_896x896_80k_isaid.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18s_4x4_896x896_80k_isaid/fcn_hr18s_4x4_896x896_80k_isaid_20220118_001603-3cc0769b.pth +- Name: fcn_hr18_4x4_896x896_80k_isaid + In Collection: FCN + Metadata: + backbone: HRNetV2p-W18 + crop size: (896,896) + lr schd: 80000 + inference time (ms/im): + - value: 129.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (896,896) + Training Memory (GB): 8.3 + Results: + - Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 65.06 + mIoU(ms+flip): 65.6 + Config: configs/hrnet/fcn_hr18_4x4_896x896_80k_isaid.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr18_4x4_896x896_80k_isaid/fcn_hr18_4x4_896x896_80k_isaid_20220110_182230-49bf752e.pth +- Name: fcn_hr48_4x4_896x896_80k_isaid + In Collection: FCN + Metadata: + backbone: HRNetV2p-W48 + crop size: (896,896) + lr schd: 80000 + inference time (ms/im): + - value: 136.24 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (896,896) + Training Memory (GB): 16.89 + Results: + - Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 67.8 + mIoU(ms+flip): 68.53 + Config: configs/hrnet/fcn_hr48_4x4_896x896_80k_isaid.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/hrnet/fcn_hr48_4x4_896x896_80k_isaid/fcn_hr48_4x4_896x896_80k_isaid_20220114_174643-547fc420.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/README.md new file mode 100644 index 0000000..c011af5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/README.md @@ -0,0 +1,56 @@ +# ICNet + +[ICNet for Real-time Semantic Segmentation on High-resolution Images](https://arxiv.org/abs/1704.08545) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We focus on the challenging task of real-time semantic segmentation in this paper. It finds many practical applications and yet is with fundamental difficulty of reducing a large portion of computation for pixel-wise label inference. We propose an image cascade network (ICNet) that incorporates multi-resolution branches under proper label guidance to address this challenge. We provide in-depth analysis of our framework and introduce the cascade feature fusion unit to quickly achieve high-quality segmentation. Our system yields real-time inference on a single GPU card with decent quality results evaluated on challenging datasets like Cityscapes, CamVid and COCO-Stuff. + + + +
+ +
+ +## Citation + +```bibtext +@inproceedings{zhao2018icnet, + title={Icnet for real-time semantic segmentation on high-resolution images}, + author={Zhao, Hengshuang and Qi, Xiaojuan and Shen, Xiaoyong and Shi, Jianping and Jia, Jiaya}, + booktitle={Proceedings of the European conference on computer vision (ECCV)}, + pages={405--420}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ICNet | R-18-D8 | 832x832 | 80000 | 1.70 | 27.12 | 68.14 | 70.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521.log.json) | +| ICNet | R-18-D8 | 832x832 | 160000 | - | - | 71.64 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153.log.json) | +| ICNet (in1k-pre) | R-18-D8 | 832x832 | 80000 | - | - | 72.51 | 74.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354.log.json) | +| ICNet (in1k-pre) | R-18-D8 | 832x832 | 160000 | - | - | 74.43 | 76.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702.log.json) | +| ICNet | R-50-D8 | 832x832 | 80000 | 2.53 | 20.08 | 68.91 | 69.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625.log.json) | +| ICNet | R-50-D8 | 832x832 | 160000 | - | - | 73.82 | 75.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612.log.json) | +| ICNet (in1k-pre) | R-50-D8 | 832x832 | 80000 | - | - | 74.58 | 76.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943.log.json) | +| ICNet (in1k-pre) | R-50-D8 | 832x832 | 160000 | - | - | 76.29 | 78.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715.log.json) | +| ICNet | R-101-D8 | 832x832 | 80000 | 3.08 | 16.95 | 70.28 | 71.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447.log.json) | +| ICNet | R-101-D8 | 832x832 | 160000 | - | - | 73.80 | 76.10 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350.log.json) | +| ICNet (in1k-pre) | R-101-D8 | 832x832 | 80000 | - | - | 75.57 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414.log.json) | +| ICNet (in1k-pre) | R-101-D8 | 832x832 | 160000 | - | - | 76.15 | 77.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612.log.json) | + +Note: `in1k-pre` means pretrained model is used. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet.yml new file mode 100644 index 0000000..ebaf934 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet.yml @@ -0,0 +1,207 @@ +Collections: +- Name: ICNet + Metadata: + Training Data: + - Cityscapes + Paper: + URL: https://arxiv.org/abs/1704.08545 + Title: ICNet for Real-time Semantic Segmentation on High-resolution Images + README: configs/icnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/necks/ic_neck.py#L77 + Version: v0.18.0 + Converted From: + Code: https://github.com/hszhao/ICNet +Models: +- Name: icnet_r18-d8_832x832_80k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-18-D8 + crop size: (832,832) + lr schd: 80000 + inference time (ms/im): + - value: 36.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (832,832) + Training Memory (GB): 1.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 68.14 + mIoU(ms+flip): 70.16 + Config: configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_80k_cityscapes/icnet_r18-d8_832x832_80k_cityscapes_20210925_225521-2e36638d.pth +- Name: icnet_r18-d8_832x832_160k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-18-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.64 + mIoU(ms+flip): 74.18 + Config: configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_832x832_160k_cityscapes/icnet_r18-d8_832x832_160k_cityscapes_20210925_230153-2c6eb6e0.pth +- Name: icnet_r18-d8_in1k-pre_832x832_80k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-18-D8 + crop size: (832,832) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.51 + mIoU(ms+flip): 74.78 + Config: configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes_20210925_230354-1cbe3022.pth +- Name: icnet_r18-d8_in1k-pre_832x832_160k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-18-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.43 + mIoU(ms+flip): 76.72 + Config: configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes_20210926_052702-619c8ae1.pth +- Name: icnet_r50-d8_832x832_80k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-50-D8 + crop size: (832,832) + lr schd: 80000 + inference time (ms/im): + - value: 49.8 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (832,832) + Training Memory (GB): 2.53 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 68.91 + mIoU(ms+flip): 69.72 + Config: configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_80k_cityscapes/icnet_r50-d8_832x832_80k_cityscapes_20210926_044625-c6407341.pth +- Name: icnet_r50-d8_832x832_160k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-50-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.82 + mIoU(ms+flip): 75.67 + Config: configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_832x832_160k_cityscapes/icnet_r50-d8_832x832_160k_cityscapes_20210925_232612-a95f0d4e.pth +- Name: icnet_r50-d8_in1k-pre_832x832_80k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-50-D8 + crop size: (832,832) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.58 + mIoU(ms+flip): 76.41 + Config: configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes_20210926_032943-1743dc7b.pth +- Name: icnet_r50-d8_in1k-pre_832x832_160k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-50-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.29 + mIoU(ms+flip): 78.09 + Config: configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes_20210926_042715-ce310aea.pth +- Name: icnet_r101-d8_832x832_80k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-101-D8 + crop size: (832,832) + lr schd: 80000 + inference time (ms/im): + - value: 59.0 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (832,832) + Training Memory (GB): 3.08 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.28 + mIoU(ms+flip): 71.95 + Config: configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_80k_cityscapes/icnet_r101-d8_832x832_80k_cityscapes_20210926_072447-b52f936e.pth +- Name: icnet_r101-d8_832x832_160k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-101-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.8 + mIoU(ms+flip): 76.1 + Config: configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_832x832_160k_cityscapes/icnet_r101-d8_832x832_160k_cityscapes_20210926_092350-3a1ebf1a.pth +- Name: icnet_r101-d8_in1k-pre_832x832_80k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-101-D8 + crop size: (832,832) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.57 + mIoU(ms+flip): 77.86 + Config: configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes_20210926_020414-7ceb12c5.pth +- Name: icnet_r101-d8_in1k-pre_832x832_160k_cityscapes + In Collection: ICNet + Metadata: + backbone: R-101-D8 + crop size: (832,832) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.15 + mIoU(ms+flip): 77.98 + Config: configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes_20210925_232612-9484ae8a.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py new file mode 100644 index 0000000..24cbf53 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_832x832_160k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py' +model = dict(backbone=dict(backbone_cfg=dict(depth=101))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py new file mode 100644 index 0000000..f3338b5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_832x832_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py' +model = dict(backbone=dict(backbone_cfg=dict(depth=101))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py new file mode 100644 index 0000000..74ac355 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_in1k-pre_832x832_160k_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c')))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py new file mode 100644 index 0000000..b4ba6d6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r101-d8_in1k-pre_832x832_80k_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c')))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py new file mode 100644 index 0000000..877b775 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_832x832_160k_cityscapes.py @@ -0,0 +1,3 @@ +_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py' +model = dict( + backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py new file mode 100644 index 0000000..786c7cc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_832x832_80k_cityscapes.py @@ -0,0 +1,3 @@ +_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py' +model = dict( + backbone=dict(layer_channels=(128, 512), backbone_cfg=dict(depth=18))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py new file mode 100644 index 0000000..cc47951 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_in1k-pre_832x832_160k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py' +model = dict( + backbone=dict( + layer_channels=(128, 512), + backbone_cfg=dict( + depth=18, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py new file mode 100644 index 0000000..00b0fe0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r18-d8_in1k-pre_832x832_80k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py' +model = dict( + backbone=dict( + layer_channels=(128, 512), + backbone_cfg=dict( + depth=18, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py new file mode 100644 index 0000000..5b9fd9b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_832x832_160k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/icnet_r50-d8.py', + '../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py new file mode 100644 index 0000000..e0336c9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_832x832_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/icnet_r50-d8.py', + '../_base_/datasets/cityscapes_832x832.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py new file mode 100644 index 0000000..6f7a0a1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_in1k-pre_832x832_160k_cityscapes.py @@ -0,0 +1,6 @@ +_base_ = './icnet_r50-d8_832x832_160k_cityscapes.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py new file mode 100644 index 0000000..57546cd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/icnet/icnet_r50-d8_in1k-pre_832x832_80k_cityscapes.py @@ -0,0 +1,6 @@ +_base_ = './icnet_r50-d8_832x832_80k_cityscapes.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet50_v1c')))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/README.md new file mode 100644 index 0000000..d1c268d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/README.md @@ -0,0 +1,80 @@ +# ISANet + +[Interlaced Sparse Self-Attention for Semantic Segmentation](https://arxiv.org/abs/1907.12273) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +In this paper, we present a so-called interlaced sparse self-attention approach to improve the efficiency of the \\emph{self-attention} mechanism for semantic segmentation. The main idea is that we factorize the dense affinity matrix as the product of two sparse affinity matrices. There are two successive attention modules each estimating a sparse affinity matrix. The first attention module is used to estimate the affinities within a subset of positions that have long spatial interval distances and the second attention module is used to estimate the affinities within a subset of positions that have short spatial interval distances. These two attention modules are designed so that each position is able to receive the information from all the other positions. In contrast to the original self-attention module, our approach decreases the computation and memory complexity substantially especially when processing high-resolution feature maps. We empirically verify the effectiveness of our approach on six challenging semantic segmentation benchmarks. + + + +
+ +
+ +## Citation + +```bibetex +@article{huang2019isa, + title={Interlaced Sparse Self-Attention for Semantic Segmentation}, + author={Huang, Lang and Yuan, Yuhui and Guo, Jianyuan and Zhang, Chao and Chen, Xilin and Wang, Jingdong}, + journal={arXiv preprint arXiv:1907.12273}, + year={2019} +} +``` + +The technical report above is also presented at: + +```bibetex +@article{yuan2021ocnet, + title={OCNet: Object Context for Semantic Segmentation}, + author={Yuan, Yuhui and Huang, Lang and Guo, Jianyuan and Zhang, Chao and Chen, Xilin and Wang, Jingdong}, + journal={International Journal of Computer Vision}, + pages={1--24}, + year={2021}, + publisher={Springer} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------: | -------------- | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ISANet | R-50-D8 | 512x1024 | 40000 | 5.869 | 2.91 | 78.49 | 79.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739-981bd763.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739.log.json) | +| ISANet | R-50-D8 | 512x1024 | 80000 | 5.869 | 2.91 | 78.68 | 80.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202-89384497.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202.log.json) | +| ISANet | R-50-D8 | 769x769 | 40000 | 6.759 | 1.54 | 78.70 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200-4ae7e65b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200.log.json) | +| ISANet | R-50-D8 | 769x769 | 80000 | 6.759 | 1.54 | 79.29 | 80.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126-99b54519.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126.log.json) | +| ISANet | R-101-D8 | 512x1024 | 40000 | 9.425 | 2.35 | 79.58 | 81.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553-293e6bd6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553.log.json) | +| ISANet | R-101-D8 | 512x1024 | 80000 | 9.425 | 2.35 | 80.32 | 81.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243-5b99c9b2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243.log.json) | +| ISANet | R-101-D8 | 769x769 | 40000 | 10.815 | 0.92 | 79.68 | 80.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320-509e7224.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320.log.json) | +| ISANet | R-101-D8 | 769x769 | 80000 | 10.815 | 0.92 | 80.61 | 81.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319-24f71dfa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------: | -------------- | ----- | ------------: | ----------------------------------------------------------------------------------------------------------------------: | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ISANet | R-50-D8 | 512x512 | 80000 | 9.0 | 22.55 | 41.12 | 42.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557-6ed83a0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557.log.json) | +| ISANet | R-50-D8 | 512x512 | 160000 | 9.0 | 22.55 | 42.59 | 43.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850-f752d0a3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850.log.json) | +| ISANet | R-101-D8 | 512x512 | 80000 | 12.562 | 10.56 | 43.51 | 44.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056-68b235c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056.log.json) | +| ISANet | R-101-D8 | 512x512 | 160000 | 12.562 | 10.56 | 43.80 | 45.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431-a7879dcd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------- | -------: | -------------- | ----- | ------------: | -----------------------------------------------------------------------------------------------------------------------: | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| ISANet | R-50-D8 | 512x512 | 20000 | 5.9 | 23.08 | 76.78 | 77.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838-79d59b80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838.log.json) | +| ISANet | R-50-D8 | 512x512 | 40000 | 5.9 | 23.08 | 76.20 | 77.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349-7d08a54e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349.log.json) | +| ISANet | R-101-D8 | 512x512 | 20000 | 9.465 | 7.42 | 78.46 | 79.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805-3ccbf355.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805.log.json) | +| ISANet | R-101-D8 | 512x512 | 40000 | 9.465 | 7.42 | 78.12 | 79.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/isanet/isanet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814-bc71233b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet.yml new file mode 100644 index 0000000..8c65bcf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet.yml @@ -0,0 +1,369 @@ +Collections: +- Name: ISANet + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + URL: https://arxiv.org/abs/1907.12273 + Title: Interlaced Sparse Self-Attention for Semantic Segmentation + README: configs/isanet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.18.0/mmseg/models/decode_heads/isa_head.py#L58 + Version: v0.18.0 + Converted From: + Code: https://github.com/openseg-group/openseg.pytorch +Models: +- Name: isanet_r50-d8_512x1024_40k_cityscapes + In Collection: ISANet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 343.64 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.869 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.49 + mIoU(ms+flip): 79.44 + Config: configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_40k_cityscapes/isanet_r50-d8_512x1024_40k_cityscapes_20210901_054739-981bd763.pth +- Name: isanet_r50-d8_512x1024_80k_cityscapes + In Collection: ISANet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 343.64 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.869 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.68 + mIoU(ms+flip): 80.25 + Config: configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x1024_80k_cityscapes/isanet_r50-d8_512x1024_80k_cityscapes_20210901_074202-89384497.pth +- Name: isanet_r50-d8_769x769_40k_cityscapes + In Collection: ISANet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 649.35 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.759 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.7 + mIoU(ms+flip): 80.28 + Config: configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_40k_cityscapes/isanet_r50-d8_769x769_40k_cityscapes_20210903_050200-4ae7e65b.pth +- Name: isanet_r50-d8_769x769_80k_cityscapes + In Collection: ISANet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 649.35 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.759 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.29 + mIoU(ms+flip): 80.53 + Config: configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_769x769_80k_cityscapes/isanet_r50-d8_769x769_80k_cityscapes_20210903_101126-99b54519.pth +- Name: isanet_r101-d8_512x1024_40k_cityscapes + In Collection: ISANet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 425.53 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.425 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.58 + mIoU(ms+flip): 81.05 + Config: configs/isanet/isanet_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_40k_cityscapes/isanet_r101-d8_512x1024_40k_cityscapes_20210901_145553-293e6bd6.pth +- Name: isanet_r101-d8_512x1024_80k_cityscapes + In Collection: ISANet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 425.53 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.425 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.32 + mIoU(ms+flip): 81.58 + Config: configs/isanet/isanet_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x1024_80k_cityscapes/isanet_r101-d8_512x1024_80k_cityscapes_20210901_145243-5b99c9b2.pth +- Name: isanet_r101-d8_769x769_40k_cityscapes + In Collection: ISANet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 1086.96 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.815 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.68 + mIoU(ms+flip): 80.95 + Config: configs/isanet/isanet_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_40k_cityscapes/isanet_r101-d8_769x769_40k_cityscapes_20210903_111320-509e7224.pth +- Name: isanet_r101-d8_769x769_80k_cityscapes + In Collection: ISANet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 1086.96 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.815 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.61 + mIoU(ms+flip): 81.59 + Config: configs/isanet/isanet_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_769x769_80k_cityscapes/isanet_r101-d8_769x769_80k_cityscapes_20210903_111319-24f71dfa.pth +- Name: isanet_r50-d8_512x512_80k_ade20k + In Collection: ISANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 44.35 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.0 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.12 + mIoU(ms+flip): 42.35 + Config: configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_80k_ade20k/isanet_r50-d8_512x512_80k_ade20k_20210903_124557-6ed83a0c.pth +- Name: isanet_r50-d8_512x512_160k_ade20k + In Collection: ISANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 44.35 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.0 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.59 + mIoU(ms+flip): 43.07 + Config: configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_160k_ade20k/isanet_r50-d8_512x512_160k_ade20k_20210903_104850-f752d0a3.pth +- Name: isanet_r101-d8_512x512_80k_ade20k + In Collection: ISANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 94.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.562 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.51 + mIoU(ms+flip): 44.38 + Config: configs/isanet/isanet_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_80k_ade20k/isanet_r101-d8_512x512_80k_ade20k_20210903_162056-68b235c2.pth +- Name: isanet_r101-d8_512x512_160k_ade20k + In Collection: ISANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 94.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.562 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.8 + mIoU(ms+flip): 45.4 + Config: configs/isanet/isanet_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_160k_ade20k/isanet_r101-d8_512x512_160k_ade20k_20210903_211431-a7879dcd.pth +- Name: isanet_r50-d8_512x512_20k_voc12aug + In Collection: ISANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 43.33 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 5.9 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.78 + mIoU(ms+flip): 77.79 + Config: configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_20k_voc12aug/isanet_r50-d8_512x512_20k_voc12aug_20210901_164838-79d59b80.pth +- Name: isanet_r50-d8_512x512_40k_voc12aug + In Collection: ISANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + inference time (ms/im): + - value: 43.33 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 5.9 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.2 + mIoU(ms+flip): 77.22 + Config: configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r50-d8_512x512_40k_voc12aug/isanet_r50-d8_512x512_40k_voc12aug_20210901_151349-7d08a54e.pth +- Name: isanet_r101-d8_512x512_20k_voc12aug + In Collection: ISANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 134.77 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.465 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.46 + mIoU(ms+flip): 79.16 + Config: configs/isanet/isanet_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_20k_voc12aug/isanet_r101-d8_512x512_20k_voc12aug_20210901_115805-3ccbf355.pth +- Name: isanet_r101-d8_512x512_40k_voc12aug + In Collection: ISANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + inference time (ms/im): + - value: 134.77 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.465 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.12 + mIoU(ms+flip): 79.04 + Config: configs/isanet/isanet_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/isanet/isanet_r101-d8_512x512_40k_voc12aug/isanet_r101-d8_512x512_40k_voc12aug_20210901_145814-bc71233b.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..f5cd8cb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..ebc15cb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..3329010 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..46fee91 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..64bd8c1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..6e13e20 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..cf362aa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..3c2283b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './isanet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..f8675e9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..46119fb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..7d5c235 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..d8b60ba --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..4729899 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..e35480d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..201a358 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..5604350 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/isanet/isanet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/isanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/README.md new file mode 100644 index 0000000..a51c5cb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/README.md @@ -0,0 +1,50 @@ +# K-Net + +[K-Net: Towards Unified Image Segmentation](https://arxiv.org/abs/2106.14855) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Semantic, instance, and panoptic segmentations have been addressed using different and specialized frameworks despite their underlying connections. This paper presents a unified, simple, and effective framework for these essentially similar tasks. The framework, named K-Net, segments both instances and semantic categories consistently by a group of learnable kernels, where each kernel is responsible for generating a mask for either a potential instance or a stuff class. To remedy the difficulties of distinguishing various instances, we propose a kernel update strategy that enables each kernel dynamic and conditional on its meaningful group in the input image. K-Net can be trained in an end-to-end manner with bipartite matching, and its training and inference are naturally NMS-free and box-free. Without bells and whistles, K-Net surpasses all previous published state-of-the-art single-model results of panoptic segmentation on MS COCO test-dev split and semantic segmentation on ADE20K val split with 55.2% PQ and 54.3% mIoU, respectively. Its instance segmentation performance is also on par with Cascade Mask R-CNN on MS COCO with 60%-90% faster inference speeds. Code and models will be released at [this https URL](https://github.com/ZwwWayne/K-Net/). + + + +
+ +
+ +```bibtex +@inproceedings{zhang2021knet, + title={{K-Net: Towards} Unified Image Segmentation}, + author={Wenwei Zhang and Jiangmiao Pang and Kai Chen and Chen Change Loy}, + year={2021}, + booktitle={NeurIPS}, +} +``` + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | -------- | --------- | ------- | -------- | -------------- | ----- | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| KNet + FCN | R-50-D8 | 512x512 | 80000 | 7.01 | 19.24 | 43.60 | 45.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751-abcab920.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751.log.json) | +| KNet + PSPNet | R-50-D8 | 512x512 | 80000 | 6.98 | 20.04 | 44.18 | 45.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634-d2c72240.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634.log.json) | +| KNet + DeepLabV3 | R-50-D8 | 512x512 | 80000 | 7.42 | 12.10 | 45.06 | 46.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642-00c8fbeb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642.log.json) | +| KNet + UPerNet | R-50-D8 | 512x512 | 80000 | 7.34 | 17.11 | 43.45 | 44.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657-215753b0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657.log.json) | +| KNet + UPerNet | Swin-T | 512x512 | 80000 | 7.57 | 15.56 | 45.84 | 46.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059-7545e1dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059.log.json) | +| KNet + UPerNet | Swin-L | 512x512 | 80000 | 13.5 | 8.29 | 52.05 | 53.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559-d8da9a90.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559.log.json) | +| KNet + UPerNet | Swin-L | 640x640 | 80000 | 13.54 | 8.29 | 52.21 | 53.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747-8787fc71.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747.log.json) | + +Note: + +- All experiments of K-Net are implemented with 8 V100 (32G) GPUs with 2 samplers per GPU. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet.yml new file mode 100644 index 0000000..5e2e529 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet.yml @@ -0,0 +1,169 @@ +Collections: +- Name: KNet + Metadata: + Training Data: + - ADE20K + Paper: + URL: https://arxiv.org/abs/2106.14855 + Title: 'K-Net: Towards Unified Image Segmentation' + README: configs/knet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.23.0/mmseg/models/decode_heads/knet_head.py#L392 + Version: v0.23.0 + Converted From: + Code: https://github.com/ZwwWayne/K-Net/ +Models: +- Name: knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k + In Collection: KNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 51.98 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.01 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.6 + mIoU(ms+flip): 45.12 + Config: configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_043751-abcab920.pth +- Name: knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k + In Collection: KNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 49.9 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.98 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.18 + mIoU(ms+flip): 45.58 + Config: configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_054634-d2c72240.pth +- Name: knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k + In Collection: KNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 82.64 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.42 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.06 + mIoU(ms+flip): 46.11 + Config: configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k_20220228_041642-00c8fbeb.pth +- Name: knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k + In Collection: KNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 58.45 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.34 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.45 + mIoU(ms+flip): 44.07 + Config: configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k_20220304_125657-215753b0.pth +- Name: knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k + In Collection: KNet + Metadata: + backbone: Swin-T + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 64.27 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.57 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.84 + mIoU(ms+flip): 46.27 + Config: configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k_20220303_133059-7545e1dc.pth +- Name: knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k + In Collection: KNet + Metadata: + backbone: Swin-L + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 120.63 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 13.5 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.05 + mIoU(ms+flip): 53.24 + Config: configs/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k_20220303_154559-d8da9a90.pth +- Name: knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k + In Collection: KNet + Metadata: + backbone: Swin-L + crop size: (640,640) + lr schd: 80000 + inference time (ms/im): + - value: 120.63 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (640,640) + Training Memory (GB): 13.54 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 52.21 + mIoU(ms+flip): 53.34 + Config: configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k_20220301_220747-8787fc71.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py new file mode 100644 index 0000000..3edb05c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_deeplabv3_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -0,0 +1,93 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +num_stages = 3 +conv_kernel_size = 1 +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='IterativeDecodeHead', + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=2048, + in_channels=512, + out_channels=512, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict( + conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_updator_cfg=dict( + type='KernelUpdator', + in_channels=256, + feat_channels=256, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'))) for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='ASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +# learning policy +lr_config = dict( + _delete_=True, + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.001, + step=[60000, 72000], + by_epoch=False) +# In K-Net implementation we use batch size 2 per GPU as default +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py new file mode 100644 index 0000000..29a088f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_fcn_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -0,0 +1,93 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +num_stages = 3 +conv_kernel_size = 1 +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='IterativeDecodeHead', + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=2048, + in_channels=512, + out_channels=512, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict( + conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_updator_cfg=dict( + type='KernelUpdator', + in_channels=256, + feat_channels=256, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'))) for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='FCNHead', + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +# learning policy +lr_config = dict( + _delete_=True, + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.001, + step=[60000, 72000], + by_epoch=False) +# In K-Net implementation we use batch size 2 per GPU as default +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py new file mode 100644 index 0000000..d77a3b4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_pspnet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -0,0 +1,92 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +num_stages = 3 +conv_kernel_size = 1 +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='IterativeDecodeHead', + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=2048, + in_channels=512, + out_channels=512, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict( + conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_updator_cfg=dict( + type='KernelUpdator', + in_channels=256, + feat_channels=256, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'))) for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +# learning policy +lr_config = dict( + _delete_=True, + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.001, + step=[60000, 72000], + by_epoch=False) +# In K-Net implementation we use batch size 2 per GPU as default +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py new file mode 100644 index 0000000..0071cea --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py @@ -0,0 +1,93 @@ +_base_ = [ + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +num_stages = 3 +conv_kernel_size = 1 + +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='IterativeDecodeHead', + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=2048, + in_channels=512, + out_channels=512, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict( + conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_updator_cfg=dict( + type='KernelUpdator', + in_channels=256, + feat_channels=256, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'))) for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='UPerHead', + in_channels=[256, 512, 1024, 2048], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) +# optimizer +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0005) +optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +# learning policy +lr_config = dict( + _delete_=True, + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.001, + step=[60000, 72000], + by_epoch=False) +# In K-Net implementation we use batch size 2 per GPU as default +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k.py new file mode 100644 index 0000000..b9d1a09 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_swin-l_8x2_512x512_adamw_80k_ade20k.py @@ -0,0 +1,19 @@ +_base_ = 'knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py' + +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220308-d5bdebaf.pth' # noqa +# model settings +model = dict( + pretrained=checkpoint_file, + backbone=dict( + embed_dims=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=7, + use_abs_pos_embed=False, + drop_path_rate=0.3, + patch_norm=True), + decode_head=dict( + kernel_generate_head=dict(in_channels=[192, 384, 768, 1536])), + auxiliary_head=dict(in_channels=768)) +# In K-Net implementation we use batch size 2 per GPU as default +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py new file mode 100644 index 0000000..fc6e9fe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_swin-l_8x2_640x640_adamw_80k_ade20k.py @@ -0,0 +1,54 @@ +_base_ = 'knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py' + +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220308-d5bdebaf.pth' # noqa +# model settings +model = dict( + pretrained=checkpoint_file, + backbone=dict( + embed_dims=192, + depths=[2, 2, 18, 2], + num_heads=[6, 12, 24, 48], + window_size=7, + use_abs_pos_embed=False, + drop_path_rate=0.4, + patch_norm=True), + decode_head=dict( + kernel_generate_head=dict(in_channels=[192, 384, 768, 1536])), + auxiliary_head=dict(in_channels=768)) + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (640, 640) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 640), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 640), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +# In K-Net implementation we use batch size 2 per GPU as default +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py new file mode 100644 index 0000000..0b29b2b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/knet/knet_s3_upernet_swin-t_8x2_512x512_adamw_80k_ade20k.py @@ -0,0 +1,57 @@ +_base_ = 'knet_s3_upernet_r50-d8_8x2_512x512_adamw_80k_ade20k.py' + +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220308-f41b89d3.pth' # noqa + +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +num_stages = 3 +conv_kernel_size = 1 + +model = dict( + type='EncoderDecoder', + pretrained=checkpoint_file, + backbone=dict( + _delete_=True, + type='SwinTransformer', + embed_dims=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.3, + use_abs_pos_embed=False, + patch_norm=True, + out_indices=(0, 1, 2, 3)), + decode_head=dict( + kernel_generate_head=dict(in_channels=[96, 192, 384, 768])), + auxiliary_head=dict(in_channels=384)) + +# modify learning rate following the official implementation of Swin Transformer # noqa +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.0005, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) +optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +# learning policy +lr_config = dict( + _delete_=True, + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.001, + step=[60000, 72000], + by_epoch=False) +# In K-Net implementation we use batch size 2 per GPU as default +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/README.md new file mode 100644 index 0000000..562f6f8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/README.md @@ -0,0 +1,82 @@ +# MAE + +[Masked Autoencoders Are Scalable Vision Learners](https://arxiv.org/abs/2111.06377) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +This paper shows that masked autoencoders (MAE) are scalable self-supervised learners for computer vision. Our MAE approach is simple: we mask random patches of the input image and reconstruct the missing pixels. It is based on two core designs. First, we develop an asymmetric encoder-decoder architecture, with an encoder that operates only on the visible subset of patches (without mask tokens), along with a lightweight decoder that reconstructs the original image from the latent representation and mask tokens. Second, we find that masking a high proportion of the input image, e.g., 75%, yields a nontrivial and meaningful self-supervisory task. Coupling these two designs enables us to train large models efficiently and effectively: we accelerate training (by 3x or more) and improve accuracy. Our scalable approach allows for learning high-capacity models that generalize well: e.g., a vanilla ViT-Huge model achieves the best accuracy (87.8%) among methods that use only ImageNet-1K data. Transfer performance in downstream tasks outperforms supervised pre-training and shows promising scaling behavior. + + + +
+ +
+ +## Citation + +```bibtex +@article{he2021masked, + title={Masked autoencoders are scalable vision learners}, + author={He, Kaiming and Chen, Xinlei and Xie, Saining and Li, Yanghao and Doll{\'a}r, Piotr and Girshick, Ross}, + journal={arXiv preprint arXiv:2111.06377}, + year={2021} +} +``` + +## Usage + +To use other repositories' pre-trained models, it is necessary to convert keys. + +We provide a script [`beit2mmseg.py`](../../tools/model_converters/beit2mmseg.py) in the tools directory to convert the key of MAE model from [the official repo](https://github.com/facebookresearch/mae) to MMSegmentation style. + +```shell +python tools/model_converters/beit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/beit2mmseg.py https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth pretrain/mae_pretrain_vit_base_mmcls.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +In our default setting, pretrained models could be defined below: + +| pretrained models | original models | +| ------------------------------- | ------------------------------------------------------------------------------------------------ | +| mae_pretrain_vit_base_mmcls.pth | ['mae_pretrain_vit_base'](https://dl.fbaipublicfiles.com/mae/pretrain/mae_pretrain_vit_base.pth) | + +Verify the single-scale results of the model: + +```shell +sh tools/dist_test.sh \ +configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py \ +upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth $GPUS --eval mIoU +``` + +Since relative position embedding requires the input length and width to be equal, the sliding window is adopted for multi-scale inference. So we set min_size=512, that is, the shortest edge is 512. So the multi-scale inference of config is performed separately, instead of '--aug-test'. For multi-scale inference: + +```shell +sh tools/dist_test.sh \ +configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py \ +upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth $GPUS --eval mIoU +``` + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | pretrain | pretrain img size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ----------- | ----------------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UPerNet | ViT-B | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 9.96 | 7.14 | 48.13 | 48.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/mae.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/mae.yml new file mode 100644 index 0000000..d78f99c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/mae.yml @@ -0,0 +1,23 @@ +Models: +- Name: upernet_mae-base_fp16_8x2_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: ViT-B + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 140.06 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (512,512) + Training Memory (GB): 9.96 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.13 + mIoU(ms+flip): 48.7 + Config: configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k/upernet_mae-base_fp16_8x2_512x512_160k_ade20k_20220426_174752-f92a2975.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py new file mode 100644 index 0000000..85b3be3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/upernet_mae-base_fp16_512x512_160k_ade20k_ms.py @@ -0,0 +1,24 @@ +_base_ = './upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py' + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True, min_size=512), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline), + samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py new file mode 100644 index 0000000..cb236cc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mae/upernet_mae-base_fp16_8x2_512x512_160k_ade20k.py @@ -0,0 +1,48 @@ +_base_ = [ + '../_base_/models/upernet_mae.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict( + pretrained='./pretrain/mae_pretrain_vit_base_mmcls.pth', + backbone=dict( + type='MAE', + img_size=(512, 512), + patch_size=16, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + init_values=1.0, + drop_path_rate=0.1, + out_indices=[3, 5, 7, 11]), + neck=dict(embed_dim=768, rescales=[4, 2, 1, 0.5]), + decode_head=dict( + in_channels=[768, 768, 768, 768], num_classes=150, channels=768), + auxiliary_head=dict(in_channels=768, num_classes=150), + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341))) + +optimizer = dict( + _delete_=True, + type='AdamW', + lr=1e-4, + betas=(0.9, 0.999), + weight_decay=0.05, + constructor='LayerDecayOptimizerConstructor', + paramwise_cfg=dict(num_layers=12, layer_decay_rate=0.65)) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# mixed precision +fp16 = dict(loss_scale='dynamic') + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/README.md new file mode 100644 index 0000000..3ea8a46 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/README.md @@ -0,0 +1,56 @@ +# MobileNetV2 + +[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +In this paper we describe a new mobile architecture, MobileNetV2, that improves the state of the art performance of mobile models on multiple tasks and benchmarks as well as across a spectrum of different model sizes. We also describe efficient ways of applying these mobile models to object detection in a novel framework we call SSDLite. Additionally, we demonstrate how to build mobile semantic segmentation models through a reduced form of DeepLabv3 which we call Mobile DeepLabv3. +The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Additionally, we find that it is important to remove non-linearities in the narrow layers in order to maintain representational power. We demonstrate that this improves performance and provide an intuition that led to this design. Finally, our approach allows decoupling of the input/output domains from the expressiveness of the transformation, which provides a convenient framework for further analysis. We measure our performance on Imagenet classification, COCO object detection, VOC image segmentation. We evaluate the trade-offs between accuracy, and number of operations measured by multiply-adds (MAdd), as well as the number of parameters. + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{sandler2018mobilenetv2, + title={Mobilenetv2: Inverted residuals and linear bottlenecks}, + author={Sandler, Mark and Howard, Andrew and Zhu, Menglong and Zhmoginov, Andrey and Chen, Liang-Chieh}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={4510--4520}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FCN | M-V2-D8 | 512x1024 | 80000 | 3.4 | 14.2 | 61.54 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-d24c28c1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) | +| PSPNet | M-V2-D8 | 512x1024 | 80000 | 3.6 | 11.2 | 70.23 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes-20200825_124817.log.json) | +| DeepLabV3 | M-V2-D8 | 512x1024 | 80000 | 3.9 | 8.4 | 73.84 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | +| DeepLabV3+ | M-V2-D8 | 512x1024 | 80000 | 5.1 | 8.4 | 75.20 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes-20200825_124836.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | M-V2-D8 | 512x512 | 160000 | 6.5 | 64.4 | 19.71 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k_20200825_214953-c40e1095.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json) | +| PSPNet | M-V2-D8 | 512x512 | 160000 | 6.5 | 57.7 | 29.68 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k_20200825_214953-f5942f7a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k-20200825_214953.log.json) | +| DeepLabV3 | M-V2-D8 | 512x512 | 160000 | 6.8 | 39.9 | 34.08 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k_20200825_223255-63986343.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) | +| DeepLabV3+ | M-V2-D8 | 512x512 | 160000 | 8.2 | 43.1 | 34.02 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k_20200825_223255-465a01d4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k-20200825_223255.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..267483d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..e15b8cc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py @@ -0,0 +1,12 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..d4533d7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320, c1_in_channels=24), + auxiliary_head=dict(in_channels=96)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..7615a7c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py @@ -0,0 +1,12 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320, c1_in_channels=24), + auxiliary_head=dict(in_channels=96)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..a535bd0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..c5f6ab0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py @@ -0,0 +1,12 @@ +_base_ = '../fcn/fcn_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/mobilenet_v2.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/mobilenet_v2.yml new file mode 100644 index 0000000..5527ba8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/mobilenet_v2.yml @@ -0,0 +1,169 @@ +Models: +- Name: fcn_m-v2-d8_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: M-V2-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 70.42 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 61.54 + Config: configs/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x1024_80k_cityscapes/fcn_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-d24c28c1.pth +- Name: pspnet_m-v2-d8_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: M-V2-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 89.29 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.6 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 70.23 + Config: configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes/pspnet_m-v2-d8_512x1024_80k_cityscapes_20200825_124817-19e81d51.pth +- Name: deeplabv3_m-v2-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: M-V2-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 119.05 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.84 + Config: configs/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x1024_80k_cityscapes/deeplabv3_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-bef03590.pth +- Name: deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: M-V2-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 119.05 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.1 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.2 + Config: configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes/deeplabv3plus_m-v2-d8_512x1024_80k_cityscapes_20200825_124836-d256dd4b.pth +- Name: fcn_m-v2-d8_512x512_160k_ade20k + In Collection: FCN + Metadata: + backbone: M-V2-D8 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 15.53 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.5 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 19.71 + Config: configs/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/fcn_m-v2-d8_512x512_160k_ade20k/fcn_m-v2-d8_512x512_160k_ade20k_20200825_214953-c40e1095.pth +- Name: pspnet_m-v2-d8_512x512_160k_ade20k + In Collection: PSPNet + Metadata: + backbone: M-V2-D8 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 17.33 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.5 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 29.68 + Config: configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k/pspnet_m-v2-d8_512x512_160k_ade20k_20200825_214953-f5942f7a.pth +- Name: deeplabv3_m-v2-d8_512x512_160k_ade20k + In Collection: DeepLabV3 + Metadata: + backbone: M-V2-D8 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 25.06 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.8 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 34.08 + Config: configs/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3_m-v2-d8_512x512_160k_ade20k/deeplabv3_m-v2-d8_512x512_160k_ade20k_20200825_223255-63986343.pth +- Name: deeplabv3plus_m-v2-d8_512x512_160k_ade20k + In Collection: DeepLabV3+ + Metadata: + backbone: M-V2-D8 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 23.2 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 34.02 + Config: configs/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v2/deeplabv3plus_m-v2-d8_512x512_160k_ade20k/deeplabv3plus_m-v2-d8_512x512_160k_ade20k_20200825_223255-465a01d4.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..7403bee --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,12 @@ +_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..5b72ac8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v2/pspnet_m-v2-d8_512x512_160k_ade20k.py @@ -0,0 +1,12 @@ +_base_ = '../pspnet/pspnet_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='mmcls://mobilenet_v2', + backbone=dict( + _delete_=True, + type='MobileNetV2', + widen_factor=1., + strides=(1, 2, 2, 1, 1, 1, 1), + dilations=(1, 1, 1, 2, 2, 4, 4), + out_indices=(1, 2, 4, 6)), + decode_head=dict(in_channels=320), + auxiliary_head=dict(in_channels=96)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/README.md new file mode 100644 index 0000000..66f2068 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/README.md @@ -0,0 +1,50 @@ +# MobileNetV3 + +[Searching for MobileNetV3](https://arxiv.org/abs/1905.02244) + +## Introduction + + + + + +Official Repo + +Code Snippet + +## Abstract + + + +We present the next generation of MobileNets based on a combination of complementary search techniques as well as a novel architecture design. MobileNetV3 is tuned to mobile phone CPUs through a combination of hardware-aware network architecture search (NAS) complemented by the NetAdapt algorithm and then subsequently improved through novel architecture advances. This paper starts the exploration of how automated search algorithms and network design can work together to harness complementary approaches improving the overall state of the art. Through this process we create two new MobileNet models for release: MobileNetV3-Large and MobileNetV3-Small which are targeted for high and low resource use cases. These models are then adapted and applied to the tasks of object detection and semantic segmentation. For the task of semantic segmentation (or any dense pixel prediction), we propose a new efficient segmentation decoder Lite Reduced Atrous Spatial Pyramid Pooling (LR-ASPP). We achieve new state of the art results for mobile classification, detection and segmentation. MobileNetV3-Large is 3.2% more accurate on ImageNet classification while reducing latency by 15% compared to MobileNetV2. MobileNetV3-Small is 4.6% more accurate while reducing latency by 5% compared to MobileNetV2. MobileNetV3-Large detection is 25% faster at roughly the same accuracy as MobileNetV2 on COCO detection. MobileNetV3-Large LR-ASPP is 30% faster than MobileNetV2 R-ASPP at similar accuracy for Cityscapes segmentation. + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{Howard_2019_ICCV, + title={Searching for MobileNetV3}, + author={Howard, Andrew and Sandler, Mark and Chu, Grace and Chen, Liang-Chieh and Chen, Bo and Tan, Mingxing and Wang, Weijun and Zhu, Yukun and Pang, Ruoming and Vasudevan, Vijay and Le, Quoc V. and Adam, Hartwig}, + booktitle={The IEEE International Conference on Computer Vision (ICCV)}, + pages={1314-1324}, + month={October}, + year={2019}, + doi={10.1109/ICCV.2019.00140}} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| LRASPP | M-V3-D8 | 512x1024 | 320000 | 8.9 | 15.22 | 69.54 | 70.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes_20201224_220337-cfe8fb07.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes-20201224_220337.log.json) | +| LRASPP | M-V3-D8 (scratch) | 512x1024 | 320000 | 8.9 | 14.77 | 67.87 | 69.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes_20201224_220337-9f29cd72.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes-20201224_220337.log.json) | +| LRASPP | M-V3s-D8 | 512x1024 | 320000 | 5.3 | 23.64 | 64.11 | 66.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes_20201224_223935-61565b34.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes-20201224_223935.log.json) | +| LRASPP | M-V3s-D8 (scratch) | 512x1024 | 320000 | 5.3 | 24.50 | 62.74 | 65.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes_20201224_223935-03daeabb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes-20201224_223935.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py new file mode 100644 index 0000000..e59a78b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py @@ -0,0 +1,11 @@ +_base_ = [ + '../_base_/models/lraspp_m-v3-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict(pretrained='open-mmlab://contrib/mobilenet_v3_large') + +# Re-config the data sampler. +data = dict(samples_per_gpu=4, workers_per_gpu=4) + +runner = dict(type='IterBasedRunner', max_iters=320000) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py new file mode 100644 index 0000000..a3c5435 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/lraspp_m-v3-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +# Re-config the data sampler. +data = dict(samples_per_gpu=4, workers_per_gpu=4) + +runner = dict(type='IterBasedRunner', max_iters=320000) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py new file mode 100644 index 0000000..d4e368b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py @@ -0,0 +1,23 @@ +_base_ = './lraspp_m-v3-d8_512x1024_320k_cityscapes.py' +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='open-mmlab://contrib/mobilenet_v3_small', + backbone=dict( + type='MobileNetV3', + arch='small', + out_indices=(0, 1, 12), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 16, 576), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py new file mode 100644 index 0000000..0c5f707 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py @@ -0,0 +1,22 @@ +_base_ = './lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py' +norm_cfg = dict(type='SyncBN', eps=0.001, requires_grad=True) +model = dict( + type='EncoderDecoder', + backbone=dict( + type='MobileNetV3', + arch='small', + out_indices=(0, 1, 12), + norm_cfg=norm_cfg), + decode_head=dict( + type='LRASPPHead', + in_channels=(16, 16, 576), + in_index=(0, 1, 2), + channels=128, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/mobilenet_v3.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/mobilenet_v3.yml new file mode 100644 index 0000000..003cbe5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/mobilenet_v3/mobilenet_v3.yml @@ -0,0 +1,103 @@ +Collections: +- Name: LRASPP + Metadata: + Training Data: + - Cityscapes + Paper: + URL: https://arxiv.org/abs/1905.02244 + Title: Searching for MobileNetV3 + README: configs/mobilenet_v3/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mobilenet_v3.py#L15 + Version: v0.17.0 + Converted From: + Code: https://github.com/tensorflow/models/tree/master/research/deeplab +Models: +- Name: lraspp_m-v3-d8_512x1024_320k_cityscapes + In Collection: LRASPP + Metadata: + backbone: M-V3-D8 + crop size: (512,1024) + lr schd: 320000 + inference time (ms/im): + - value: 65.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 8.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 69.54 + mIoU(ms+flip): 70.89 + Config: configs/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_512x1024_320k_cityscapes/lraspp_m-v3-d8_512x1024_320k_cityscapes_20201224_220337-cfe8fb07.pth +- Name: lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes + In Collection: LRASPP + Metadata: + backbone: M-V3-D8 (scratch) + crop size: (512,1024) + lr schd: 320000 + inference time (ms/im): + - value: 67.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 8.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 67.87 + mIoU(ms+flip): 69.78 + Config: configs/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3-d8_scratch_512x1024_320k_cityscapes_20201224_220337-9f29cd72.pth +- Name: lraspp_m-v3s-d8_512x1024_320k_cityscapes + In Collection: LRASPP + Metadata: + backbone: M-V3s-D8 + crop size: (512,1024) + lr schd: 320000 + inference time (ms/im): + - value: 42.3 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.3 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 64.11 + mIoU(ms+flip): 66.42 + Config: configs/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_512x1024_320k_cityscapes/lraspp_m-v3s-d8_512x1024_320k_cityscapes_20201224_223935-61565b34.pth +- Name: lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes + In Collection: LRASPP + Metadata: + backbone: M-V3s-D8 (scratch) + crop size: (512,1024) + lr schd: 320000 + inference time (ms/im): + - value: 40.82 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 5.3 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 62.74 + mIoU(ms+flip): 65.01 + Config: configs/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/mobilenet_v3/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes/lraspp_m-v3s-d8_scratch_512x1024_320k_cityscapes_20201224_223935-03daeabb.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/README.md new file mode 100644 index 0000000..1109599 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/README.md @@ -0,0 +1,68 @@ +# NonLocal Net + +[Non-local Neural Networks](https://arxiv.org/abs/1711.07971) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Both convolutional and recurrent operations are building blocks that process one local neighborhood at a time. In this paper, we present non-local operations as a generic family of building blocks for capturing long-range dependencies. Inspired by the classical non-local means method in computer vision, our non-local operation computes the response at a position as a weighted sum of the features at all positions. This building block can be plugged into many computer vision architectures. On the task of video classification, even without any bells and whistles, our non-local models can compete or outperform current competition winners on both Kinetics and Charades datasets. In static image recognition, our non-local models improve object detection/segmentation and pose estimation on the COCO suite of tasks. Code is available at [this https URL](https://github.com/facebookresearch/video-nonlocal-net). + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{wang2018non, + title={Non-local neural networks}, + author={Wang, Xiaolong and Girshick, Ross and Gupta, Abhinav and He, Kaiming}, + booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition}, + pages={7794--7803}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| NonLocalNet | R-50-D8 | 512x1024 | 40000 | 7.4 | 2.72 | 78.24 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748.log.json) | +| NonLocalNet | R-101-D8 | 512x1024 | 40000 | 10.9 | 1.95 | 78.66 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748.log.json) | +| NonLocalNet | R-50-D8 | 769x769 | 40000 | 8.9 | 1.52 | 78.33 | 79.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243.log.json) | +| NonLocalNet | R-101-D8 | 769x769 | 40000 | 12.8 | 1.05 | 78.57 | 80.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348.log.json) | +| NonLocalNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.01 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518.log.json) | +| NonLocalNet | R-101-D8 | 512x1024 | 80000 | - | - | 78.93 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411.log.json) | +| NonLocalNet | R-50-D8 | 769x769 | 80000 | - | - | 79.05 | 80.68 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506.log.json) | +| NonLocalNet | R-101-D8 | 769x769 | 80000 | - | - | 79.40 | 80.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| NonLocalNet | R-50-D8 | 512x512 | 80000 | 9.1 | 21.37 | 40.75 | 42.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 80000 | 12.6 | 13.97 | 42.90 | 44.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758.log.json) | +| NonLocalNet | R-50-D8 | 512x512 | 160000 | - | - | 42.03 | 43.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 160000 | - | - | 44.63 | 45.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502-7881aa1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ----------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| NonLocalNet | R-50-D8 | 512x512 | 20000 | 6.4 | 21.21 | 76.20 | 77.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 20000 | 9.8 | 14.01 | 78.15 | 78.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615.log.json) | +| NonLocalNet | R-50-D8 | 512x512 | 40000 | - | - | 76.65 | 77.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028.log.json) | +| NonLocalNet | R-101-D8 | 512x512 | 40000 | - | - | 78.27 | 79.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_net.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_net.yml new file mode 100644 index 0000000..bab38ce --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_net.yml @@ -0,0 +1,301 @@ +Collections: +- Name: NonLocalNet + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + URL: https://arxiv.org/abs/1711.07971 + Title: Non-local Neural Networks + README: configs/nonlocal_net/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/nl_head.py#L10 + Version: v0.17.0 + Converted From: + Code: https://github.com/facebookresearch/video-nonlocal-net +Models: +- Name: nonlocal_r50-d8_512x1024_40k_cityscapes + In Collection: NonLocalNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 367.65 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 7.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.24 + Config: configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes/nonlocal_r50-d8_512x1024_40k_cityscapes_20200605_210748-c75e81e3.pth +- Name: nonlocal_r101-d8_512x1024_40k_cityscapes + In Collection: NonLocalNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 512.82 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 10.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.66 + Config: configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes/nonlocal_r101-d8_512x1024_40k_cityscapes_20200605_210748-d63729fa.pth +- Name: nonlocal_r50-d8_769x769_40k_cityscapes + In Collection: NonLocalNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 657.89 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 8.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.33 + mIoU(ms+flip): 79.92 + Config: configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes/nonlocal_r50-d8_769x769_40k_cityscapes_20200530_045243-82ef6749.pth +- Name: nonlocal_r101-d8_769x769_40k_cityscapes + In Collection: NonLocalNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 952.38 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 12.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.57 + mIoU(ms+flip): 80.29 + Config: configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes/nonlocal_r101-d8_769x769_40k_cityscapes_20200530_045348-8fe9a9dc.pth +- Name: nonlocal_r50-d8_512x1024_80k_cityscapes + In Collection: NonLocalNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.01 + Config: configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes/nonlocal_r50-d8_512x1024_80k_cityscapes_20200607_193518-d6839fae.pth +- Name: nonlocal_r101-d8_512x1024_80k_cityscapes + In Collection: NonLocalNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.93 + Config: configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes/nonlocal_r101-d8_512x1024_80k_cityscapes_20200607_183411-32700183.pth +- Name: nonlocal_r50-d8_769x769_80k_cityscapes + In Collection: NonLocalNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.05 + mIoU(ms+flip): 80.68 + Config: configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes/nonlocal_r50-d8_769x769_80k_cityscapes_20200607_193506-1f9792f6.pth +- Name: nonlocal_r101-d8_769x769_80k_cityscapes + In Collection: NonLocalNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.4 + mIoU(ms+flip): 80.85 + Config: configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes/nonlocal_r101-d8_769x769_80k_cityscapes_20200607_183428-0e1fa4f9.pth +- Name: nonlocal_r50-d8_512x512_80k_ade20k + In Collection: NonLocalNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 46.79 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.75 + mIoU(ms+flip): 42.05 + Config: configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k/nonlocal_r50-d8_512x512_80k_ade20k_20200615_015801-5ae0aa33.pth +- Name: nonlocal_r101-d8_512x512_80k_ade20k + In Collection: NonLocalNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 71.58 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.6 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.9 + mIoU(ms+flip): 44.27 + Config: configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k/nonlocal_r101-d8_512x512_80k_ade20k_20200615_015758-24105919.pth +- Name: nonlocal_r50-d8_512x512_160k_ade20k + In Collection: NonLocalNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.03 + mIoU(ms+flip): 43.04 + Config: configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k/nonlocal_r50-d8_512x512_160k_ade20k_20200616_005410-baef45e3.pth +- Name: nonlocal_r101-d8_512x512_160k_ade20k + In Collection: NonLocalNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.63 + mIoU(ms+flip): 45.79 + Config: configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k/nonlocal_r101-d8_512x512_160k_ade20k_20210827_221502-7881aa1a.pth +- Name: nonlocal_r50-d8_512x512_20k_voc12aug + In Collection: NonLocalNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 47.15 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.4 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.2 + mIoU(ms+flip): 77.12 + Config: configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug/nonlocal_r50-d8_512x512_20k_voc12aug_20200617_222613-07f2a57c.pth +- Name: nonlocal_r101-d8_512x512_20k_voc12aug + In Collection: NonLocalNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 71.38 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.8 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.15 + mIoU(ms+flip): 78.86 + Config: configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug/nonlocal_r101-d8_512x512_20k_voc12aug_20200617_222615-948c68ab.pth +- Name: nonlocal_r50-d8_512x512_40k_voc12aug + In Collection: NonLocalNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.65 + mIoU(ms+flip): 77.47 + Config: configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug/nonlocal_r50-d8_512x512_40k_voc12aug_20200614_000028-0139d4a9.pth +- Name: nonlocal_r101-d8_512x512_40k_voc12aug + In Collection: NonLocalNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.27 + mIoU(ms+flip): 79.12 + Config: configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug/nonlocal_r101-d8_512x512_40k_voc12aug_20200614_000028-7e5ff470.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..ef7b06d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..7a1e66c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..df9c2ac --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..490f987 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..40d9190 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..0c6f60d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..23e6da7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..0627e2b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './nonlocal_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..9d4dc73 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..b0672b6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..b1adfba --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..2e808d8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..66b443a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..8a7a2f5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..75adef3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..a0726c2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/nonlocal_net/nonlocal_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/nonlocal_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/README.md new file mode 100644 index 0000000..1c3dba2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/README.md @@ -0,0 +1,89 @@ +# OCRNet + +[Object-Contextual Representations for Semantic Segmentation](https://arxiv.org/abs/1909.11065) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +In this paper, we address the problem of semantic segmentation and focus on the context aggregation strategy for robust segmentation. Our motivation is that the label of a pixel is the category of the object that the pixel belongs to. We present a simple yet effective approach, object-contextual representations, characterizing a pixel by exploiting the representation of the corresponding object class. First, we construct object regions based on a feature map supervised by the ground-truth segmentation, and then compute the object region representations. Second, we compute the representation similarity between each pixel and each object region, and augment the representation of each pixel with an object contextual representation, which is a weighted aggregation of all the object region representations according to their similarities with the pixel. We empirically demonstrate that the proposed approach achieves competitive performance on six challenging semantic segmentation benchmarks: Cityscapes, ADE20K, LIP, PASCAL VOC 2012, PASCAL-Context and COCO-Stuff. Notably, we achieved the \\nth{2} place on the Cityscapes leader-board with a single model. + + + +
+ +
+ +## Citation + +```bibtex +@article{YuanW18, + title={Ocnet: Object context network for scene parsing}, + author={Yuhui Yuan and Jingdong Wang}, + booktitle={arXiv preprint arXiv:1809.00916}, + year={2018} +} + +@article{YuanCW20, + title={Object-Contextual Representations for Semantic Segmentation}, + author={Yuhui Yuan and Xilin Chen and Jingdong Wang}, + booktitle={ECCV}, + year={2020} +} +``` + +## Results and models + +### Cityscapes + +#### HRNet backbone + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 40000 | 3.5 | 10.45 | 74.30 | 75.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 40000 | 4.7 | 7.50 | 77.72 | 79.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 40000 | 8 | 4.22 | 80.58 | 81.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 80000 | - | - | 77.16 | 78.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 80000 | - | - | 78.57 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 80000 | - | - | 80.70 | 81.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x1024 | 160000 | - | - | 78.45 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005.log.json) | +| OCRNet | HRNetV2p-W18 | 512x1024 | 160000 | - | - | 79.47 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001.log.json) | +| OCRNet | HRNetV2p-W48 | 512x1024 | 160000 | - | - | 81.35 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037.log.json) | + +#### ResNet backbone + +| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| OCRNet | R-101-D8 | 512x1024 | 8 | 40000 | - | - | 80.09 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721-02ac0f13.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721.log.json) | +| OCRNet | R-101-D8 | 512x1024 | 16 | 40000 | 8.8 | 3.02 | 80.30 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726-db500f80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726.log.json) | +| OCRNet | R-101-D8 | 512x1024 | 16 | 80000 | 8.8 | 3.02 | 80.81 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421-78688424.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 80000 | 6.7 | 28.98 | 35.06 | 35.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 80000 | 7.9 | 18.93 | 37.79 | 39.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 80000 | 11.2 | 16.99 | 43.00 | 44.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 160000 | - | - | 37.19 | 38.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 160000 | - | - | 39.32 | 40.80 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 160000 | - | - | 43.25 | 44.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | ------------------ | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 20000 | 3.5 | 31.55 | 71.70 | 73.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 20000 | 4.7 | 19.91 | 74.75 | 77.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 20000 | 8.1 | 17.83 | 77.72 | 79.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932.log.json) | +| OCRNet | HRNetV2p-W18-Small | 512x512 | 40000 | - | - | 72.76 | 74.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025.log.json) | +| OCRNet | HRNetV2p-W18 | 512x512 | 40000 | - | - | 74.98 | 77.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958.log.json) | +| OCRNet | HRNetV2p-W48 | 512x512 | 40000 | - | - | 77.14 | 79.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet.yml new file mode 100644 index 0000000..d599f0a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet.yml @@ -0,0 +1,438 @@ +Collections: +- Name: OCRNet + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + URL: https://arxiv.org/abs/1909.11065 + Title: Object-Contextual Representations for Semantic Segmentation + README: configs/ocrnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/ocr_head.py#L86 + Version: v0.17.0 + Converted From: + Code: https://github.com/openseg-group/OCNet.pytorch +Models: +- Name: ocrnet_hr18s_512x1024_40k_cityscapes + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 95.69 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.3 + mIoU(ms+flip): 75.95 + Config: configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes/ocrnet_hr18s_512x1024_40k_cityscapes_20200601_033304-fa2436c2.pth +- Name: ocrnet_hr18_512x1024_40k_cityscapes + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 133.33 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 4.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.72 + mIoU(ms+flip): 79.49 + Config: configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes/ocrnet_hr18_512x1024_40k_cityscapes_20200601_033320-401c5bdd.pth +- Name: ocrnet_hr48_512x1024_40k_cityscapes + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 236.97 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 8.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.58 + mIoU(ms+flip): 81.79 + Config: configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes/ocrnet_hr48_512x1024_40k_cityscapes_20200601_033336-55b32491.pth +- Name: ocrnet_hr18s_512x1024_80k_cityscapes + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.16 + mIoU(ms+flip): 78.66 + Config: configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes/ocrnet_hr18s_512x1024_80k_cityscapes_20200601_222735-55979e63.pth +- Name: ocrnet_hr18_512x1024_80k_cityscapes + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.57 + mIoU(ms+flip): 80.46 + Config: configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes/ocrnet_hr18_512x1024_80k_cityscapes_20200614_230521-c2e1dd4a.pth +- Name: ocrnet_hr48_512x1024_80k_cityscapes + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.7 + mIoU(ms+flip): 81.87 + Config: configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes/ocrnet_hr48_512x1024_80k_cityscapes_20200601_222752-9076bcdf.pth +- Name: ocrnet_hr18s_512x1024_160k_cityscapes + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,1024) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.45 + mIoU(ms+flip): 79.97 + Config: configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes/ocrnet_hr18s_512x1024_160k_cityscapes_20200602_191005-f4a7af28.pth +- Name: ocrnet_hr18_512x1024_160k_cityscapes + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,1024) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.47 + mIoU(ms+flip): 80.91 + Config: configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes/ocrnet_hr18_512x1024_160k_cityscapes_20200602_191001-b9172d0c.pth +- Name: ocrnet_hr48_512x1024_160k_cityscapes + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,1024) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.35 + mIoU(ms+flip): 82.7 + Config: configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes/ocrnet_hr48_512x1024_160k_cityscapes_20200602_191037-dfbf1b0c.pth +- Name: ocrnet_r101-d8_512x1024_40k_b8_cityscapes + In Collection: OCRNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.09 + Config: configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes/ocrnet_r101-d8_512x1024_40k_b8_cityscapes_20200717_110721-02ac0f13.pth +- Name: ocrnet_r101-d8_512x1024_40k_b16_cityscapes + In Collection: OCRNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 331.13 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 8.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.3 + Config: configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes/ocrnet_r101-d8_512x1024_40k_b16_cityscapes_20200723_193726-db500f80.pth +- Name: ocrnet_r101-d8_512x1024_80k_b16_cityscapes + In Collection: OCRNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 331.13 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 8.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.81 + Config: configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes/ocrnet_r101-d8_512x1024_80k_b16_cityscapes_20200723_192421-78688424.pth +- Name: ocrnet_hr18s_512x512_80k_ade20k + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 34.51 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.7 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 35.06 + mIoU(ms+flip): 35.8 + Config: configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_80k_ade20k/ocrnet_hr18s_512x512_80k_ade20k_20200615_055600-e80b62af.pth +- Name: ocrnet_hr18_512x512_80k_ade20k + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 52.83 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.9 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.79 + mIoU(ms+flip): 39.16 + Config: configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_80k_ade20k/ocrnet_hr18_512x512_80k_ade20k_20200615_053157-d173d83b.pth +- Name: ocrnet_hr48_512x512_80k_ade20k + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 58.86 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 11.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.0 + mIoU(ms+flip): 44.3 + Config: configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_80k_ade20k/ocrnet_hr48_512x512_80k_ade20k_20200615_021518-d168c2d1.pth +- Name: ocrnet_hr18s_512x512_160k_ade20k + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.19 + mIoU(ms+flip): 38.4 + Config: configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_160k_ade20k/ocrnet_hr18s_512x512_160k_ade20k_20200615_184505-8e913058.pth +- Name: ocrnet_hr18_512x512_160k_ade20k + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.32 + mIoU(ms+flip): 40.8 + Config: configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_160k_ade20k/ocrnet_hr18_512x512_160k_ade20k_20200615_200940-d8fcd9d1.pth +- Name: ocrnet_hr48_512x512_160k_ade20k + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.25 + mIoU(ms+flip): 44.88 + Config: configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_160k_ade20k/ocrnet_hr48_512x512_160k_ade20k_20200615_184705-a073726d.pth +- Name: ocrnet_hr18s_512x512_20k_voc12aug + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 31.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 3.5 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 71.7 + mIoU(ms+flip): 73.84 + Config: configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug/ocrnet_hr18s_512x512_20k_voc12aug_20200617_233913-02b04fcb.pth +- Name: ocrnet_hr18_512x512_20k_voc12aug + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 50.23 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 4.7 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.75 + mIoU(ms+flip): 77.11 + Config: configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_20k_voc12aug/ocrnet_hr18_512x512_20k_voc12aug_20200617_233932-8954cbb7.pth +- Name: ocrnet_hr48_512x512_20k_voc12aug + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 56.09 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.1 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.72 + mIoU(ms+flip): 79.87 + Config: configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_20k_voc12aug/ocrnet_hr48_512x512_20k_voc12aug_20200617_233932-9e82080a.pth +- Name: ocrnet_hr18s_512x512_40k_voc12aug + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18-Small + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 72.76 + mIoU(ms+flip): 74.6 + Config: configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug/ocrnet_hr18s_512x512_40k_voc12aug_20200614_002025-42b587ac.pth +- Name: ocrnet_hr18_512x512_40k_voc12aug + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W18 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.98 + mIoU(ms+flip): 77.4 + Config: configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr18_512x512_40k_voc12aug/ocrnet_hr18_512x512_40k_voc12aug_20200614_015958-714302be.pth +- Name: ocrnet_hr48_512x512_40k_voc12aug + In Collection: OCRNet + Metadata: + backbone: HRNetV2p-W48 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.14 + mIoU(ms+flip): 79.71 + Config: configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/ocrnet/ocrnet_hr48_512x512_40k_voc12aug/ocrnet_hr48_512x512_40k_voc12aug_20200614_015958-255bc5ce.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py new file mode 100644 index 0000000..1c86eba --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x1024_160k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..2c73b38 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..506ad93 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py new file mode 100644 index 0000000..a3c86e1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_160k_ade20k.py @@ -0,0 +1,35 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), +]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py new file mode 100644 index 0000000..ab9d644 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_20k_voc12aug.py @@ -0,0 +1,36 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), +]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py new file mode 100644 index 0000000..df79a9c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_40k_voc12aug.py @@ -0,0 +1,36 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=21, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), +]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py new file mode 100644 index 0000000..6ad6772 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18_512x512_80k_ade20k.py @@ -0,0 +1,35 @@ +_base_ = [ + '../_base_/models/ocrnet_hr18.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FCNHead', + in_channels=[18, 36, 72, 144], + channels=sum([18, 36, 72, 144]), + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + kernel_size=1, + num_convs=1, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[18, 36, 72, 144], + in_index=(0, 1, 2, 3), + input_transform='resize_concat', + channels=512, + ocr_channels=256, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), +]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py new file mode 100644 index 0000000..fc79097 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x1024_160k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x1024_160k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..923731f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..be6bf16 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py new file mode 100644 index 0000000..81f3d5c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py new file mode 100644 index 0000000..ceb9448 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_20k_voc12aug.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x512_20k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py new file mode 100644 index 0000000..70babc9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_40k_voc12aug.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x512_40k_voc12aug.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py new file mode 100644 index 0000000..36e7721 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr18s_512x512_80k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = './ocrnet_hr18_512x512_80k_ade20k.py' +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w18_small', + backbone=dict( + extra=dict( + stage1=dict(num_blocks=(2, )), + stage2=dict(num_blocks=(2, 2)), + stage3=dict(num_modules=3, num_blocks=(2, 2, 2)), + stage4=dict(num_modules=2, num_blocks=(2, 2, 2, 2))))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py new file mode 100644 index 0000000..c094391 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x1024_160k_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x1024_160k_cityscapes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..0aada9d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x1024_40k_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x1024_40k_cityscapes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..1b2e009 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x1024_80k_cityscapes.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x1024_80k_cityscapes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=19, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py new file mode 100644 index 0000000..3b3e8af --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_160k_ade20k.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x512_160k_ade20k.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py new file mode 100644 index 0000000..c2dd6d1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_20k_voc12aug.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x512_20k_voc12aug.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py new file mode 100644 index 0000000..89e6309 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_40k_voc12aug.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x512_40k_voc12aug.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=21, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py new file mode 100644 index 0000000..0497122 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_hr48_512x512_80k_ade20k.py @@ -0,0 +1,39 @@ +_base_ = './ocrnet_hr18_512x512_80k_ade20k.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w48', + backbone=dict( + extra=dict( + stage2=dict(num_channels=(48, 96)), + stage3=dict(num_channels=(48, 96, 192)), + stage4=dict(num_channels=(48, 96, 192, 384)))), + decode_head=[ + dict( + type='FCNHead', + in_channels=[48, 96, 192, 384], + channels=sum([48, 96, 192, 384]), + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + kernel_size=1, + num_convs=1, + norm_cfg=norm_cfg, + concat_input=False, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='OCRHead', + in_channels=[48, 96, 192, 384], + channels=512, + ocr_channels=256, + input_transform='resize_concat', + in_index=(0, 1, 2, 3), + norm_cfg=norm_cfg, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + ]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py new file mode 100644 index 0000000..3dd70b7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b16_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) +optimizer = dict(lr=0.02) +lr_config = dict(min_lr=2e-4) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py new file mode 100644 index 0000000..e34f343 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_r101-d8_512x1024_40k_b8_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py new file mode 100644 index 0000000..33d96c7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/ocrnet/ocrnet_r101-d8_512x1024_80k_b16_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/ocrnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) +optimizer = dict(lr=0.02) +lr_config = dict(min_lr=2e-4) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/README.md new file mode 100644 index 0000000..2644f46 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/README.md @@ -0,0 +1,51 @@ +# PointRend + +[PointRend: Image Segmentation as Rendering](https://arxiv.org/abs/1912.08193) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We present a new method for efficient high-quality image segmentation of objects and scenes. By analogizing classical computer graphics methods for efficient rendering with over- and undersampling challenges faced in pixel labeling tasks, we develop a unique perspective of image segmentation as a rendering problem. From this vantage, we present the PointRend (Point-based Rendering) neural network module: a module that performs point-based segmentation predictions at adaptively selected locations based on an iterative subdivision algorithm. PointRend can be flexibly applied to both instance and semantic segmentation tasks by building on top of existing state-of-the-art models. While many concrete implementations of the general idea are possible, we show that a simple design already achieves excellent results. Qualitatively, PointRend outputs crisp object boundaries in regions that are over-smoothed by previous methods. Quantitatively, PointRend yields significant gains on COCO and Cityscapes, for both instance and semantic segmentation. PointRend's efficiency enables output resolutions that are otherwise impractical in terms of memory or computation compared to existing approaches. Code has been made available at [this https URL](https://github.com/facebookresearch/detectron2/tree/main/projects/PointRend). + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{kirillov2020pointrend, + title={Pointrend: Image segmentation as rendering}, + author={Kirillov, Alexander and Wu, Yuxin and He, Kaiming and Girshick, Ross}, + booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition}, + pages={9799--9808}, + year={2020} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PointRend | R-50 | 512x1024 | 80000 | 3.1 | 8.48 | 76.47 | 78.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes-20200715_214714.log.json) | +| PointRend | R-101 | 512x1024 | 80000 | 4.2 | 7.00 | 78.30 | 79.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes-20200715_214824.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | --------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PointRend | R-50 | 512x512 | 160000 | 5.1 | 17.31 | 37.64 | 39.17 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k-20200807_232644.log.json) | +| PointRend | R-101 | 512x512 | 160000 | 6.1 | 15.50 | 40.02 | 41.60 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k-20200808_030852.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/point_rend.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/point_rend.yml new file mode 100644 index 0000000..3abe81d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/point_rend.yml @@ -0,0 +1,104 @@ +Collections: +- Name: PointRend + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + URL: https://arxiv.org/abs/1912.08193 + Title: 'PointRend: Image Segmentation as Rendering' + README: configs/point_rend/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/point_head.py#L36 + Version: v0.17.0 + Converted From: + Code: https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend +Models: +- Name: pointrend_r50_512x1024_80k_cityscapes + In Collection: PointRend + Metadata: + backbone: R-50 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 117.92 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.1 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.47 + mIoU(ms+flip): 78.13 + Config: configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x1024_80k_cityscapes/pointrend_r50_512x1024_80k_cityscapes_20200711_015821-bb1ff523.pth +- Name: pointrend_r101_512x1024_80k_cityscapes + In Collection: PointRend + Metadata: + backbone: R-101 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 142.86 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 4.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.3 + mIoU(ms+flip): 79.97 + Config: configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x1024_80k_cityscapes/pointrend_r101_512x1024_80k_cityscapes_20200711_170850-d0ca84be.pth +- Name: pointrend_r50_512x512_160k_ade20k + In Collection: PointRend + Metadata: + backbone: R-50 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 57.77 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 5.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.64 + mIoU(ms+flip): 39.17 + Config: configs/point_rend/pointrend_r50_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r50_512x512_160k_ade20k/pointrend_r50_512x512_160k_ade20k_20200807_232644-ac3febf2.pth +- Name: pointrend_r101_512x512_160k_ade20k + In Collection: PointRend + Metadata: + backbone: R-101 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 64.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.02 + mIoU(ms+flip): 41.6 + Config: configs/point_rend/pointrend_r101_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/point_rend/pointrend_r101_512x512_160k_ade20k/pointrend_r101_512x512_160k_ade20k_20200808_030852-8834902a.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..a8c14c8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r101_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pointrend_r50_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py new file mode 100644 index 0000000..4d1f8c8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r101_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './pointrend_r50_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..96cbaa4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r50_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/pointrend_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +lr_config = dict(warmup='linear', warmup_iters=200) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py new file mode 100644 index 0000000..db8c634 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/point_rend/pointrend_r50_512x512_160k_ade20k.py @@ -0,0 +1,32 @@ +_base_ = [ + '../_base_/models/pointrend_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict(decode_head=[ + dict( + type='FPNHead', + in_channels=[256, 256, 256, 256], + in_index=[0, 1, 2, 3], + feature_strides=[4, 8, 16, 32], + channels=128, + dropout_ratio=-1, + num_classes=150, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + dict( + type='PointHead', + in_channels=[256], + in_index=[0], + channels=256, + num_fcs=3, + coarse_pred_each_layer=True, + dropout_ratio=-1, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) +]) +lr_config = dict(warmup='linear', warmup_iters=200) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/README.md new file mode 100644 index 0000000..9f307b2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/README.md @@ -0,0 +1,68 @@ +# PSANet + +[PSANet: Point-wise Spatial Attention Network for Scene Parsing](https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We notice information flow in convolutional neural networksis restricted inside local neighborhood regions due to the physical de-sign of convolutional filters, which limits the overall understanding ofcomplex scenes. In this paper, we propose thepoint-wise spatial atten-tion network(PSANet) to relax the local neighborhood constraint. Eachposition on the feature map is connected to all the other ones througha self-adaptively learned attention mask. Moreover, information propa-gation in bi-direction for scene parsing is enabled. Information at otherpositions can be collected to help the prediction of the current positionand vice versa, information at the current position can be distributedto assist the prediction of other ones. Our proposed approach achievestop performance on various competitive scene parsing datasets, includ-ing ADE20K, PASCAL VOC 2012 and Cityscapes, demonstrating itseffectiveness and generality. + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{zhao2018psanet, + title={Psanet: Point-wise spatial attention network for scene parsing}, + author={Zhao, Hengshuang and Zhang, Yi and Liu, Shu and Shi, Jianping and Change Loy, Chen and Lin, Dahua and Jia, Jiaya}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + pages={267--283}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSANet | R-50-D8 | 512x1024 | 40000 | 7 | 3.17 | 77.63 | 79.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117.log.json) | +| PSANet | R-101-D8 | 512x1024 | 40000 | 10.5 | 2.20 | 79.14 | 80.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418.log.json) | +| PSANet | R-50-D8 | 769x769 | 40000 | 7.9 | 1.40 | 77.99 | 79.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717.log.json) | +| PSANet | R-101-D8 | 769x769 | 40000 | 11.9 | 0.98 | 78.43 | 80.26 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107.log.json) | +| PSANet | R-50-D8 | 512x1024 | 80000 | - | - | 77.24 | 78.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842.log.json) | +| PSANet | R-101-D8 | 512x1024 | 80000 | - | - | 79.31 | 80.53 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823.log.json) | +| PSANet | R-50-D8 | 769x769 | 80000 | - | - | 79.31 | 80.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134.log.json) | +| PSANet | R-101-D8 | 769x769 | 80000 | - | - | 79.69 | 80.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSANet | R-50-D8 | 512x512 | 80000 | 9 | 18.91 | 41.14 | 41.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141.log.json) | +| PSANet | R-101-D8 | 512x512 | 80000 | 12.5 | 13.13 | 43.80 | 44.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117.log.json) | +| PSANet | R-50-D8 | 512x512 | 160000 | - | - | 41.67 | 42.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258.log.json) | +| PSANet | R-101-D8 | 512x512 | 160000 | - | - | 43.74 | 45.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSANet | R-50-D8 | 512x512 | 20000 | 6.9 | 18.24 | 76.39 | 77.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413.log.json) | +| PSANet | R-101-D8 | 512x512 | 20000 | 10.4 | 12.63 | 77.91 | 79.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624.log.json) | +| PSANet | R-50-D8 | 512x512 | 40000 | - | - | 76.30 | 77.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946.log.json) | +| PSANet | R-101-D8 | 512x512 | 40000 | - | - | 77.73 | 79.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet.yml new file mode 100644 index 0000000..353c890 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet.yml @@ -0,0 +1,305 @@ +Collections: +- Name: PSANet + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + URL: https://openaccess.thecvf.com/content_ECCV_2018/papers/Hengshuang_Zhao_PSANet_Point-wise_Spatial_ECCV_2018_paper.pdf + Title: 'PSANet: Point-wise Spatial Attention Network for Scene Parsing' + README: configs/psanet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psa_head.py#L18 + Version: v0.17.0 + Converted From: + Code: https://github.com/hszhao/PSANet +Models: +- Name: psanet_r50-d8_512x1024_40k_cityscapes + In Collection: PSANet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 315.46 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 7.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.63 + mIoU(ms+flip): 79.04 + Config: configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_40k_cityscapes/psanet_r50-d8_512x1024_40k_cityscapes_20200606_103117-99fac37c.pth +- Name: psanet_r101-d8_512x1024_40k_cityscapes + In Collection: PSANet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 454.55 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 10.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.14 + mIoU(ms+flip): 80.19 + Config: configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_40k_cityscapes/psanet_r101-d8_512x1024_40k_cityscapes_20200606_001418-27b9cfa7.pth +- Name: psanet_r50-d8_769x769_40k_cityscapes + In Collection: PSANet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 714.29 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 7.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.99 + mIoU(ms+flip): 79.64 + Config: configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_40k_cityscapes/psanet_r50-d8_769x769_40k_cityscapes_20200530_033717-d5365506.pth +- Name: psanet_r101-d8_769x769_40k_cityscapes + In Collection: PSANet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 1020.41 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 11.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.43 + mIoU(ms+flip): 80.26 + Config: configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_40k_cityscapes/psanet_r101-d8_769x769_40k_cityscapes_20200530_035107-997da1e6.pth +- Name: psanet_r50-d8_512x1024_80k_cityscapes + In Collection: PSANet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.24 + mIoU(ms+flip): 78.69 + Config: configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x1024_80k_cityscapes/psanet_r50-d8_512x1024_80k_cityscapes_20200606_161842-ab60a24f.pth +- Name: psanet_r101-d8_512x1024_80k_cityscapes + In Collection: PSANet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.31 + mIoU(ms+flip): 80.53 + Config: configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x1024_80k_cityscapes/psanet_r101-d8_512x1024_80k_cityscapes_20200606_161823-0f73a169.pth +- Name: psanet_r50-d8_769x769_80k_cityscapes + In Collection: PSANet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.31 + mIoU(ms+flip): 80.91 + Config: configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_769x769_80k_cityscapes/psanet_r50-d8_769x769_80k_cityscapes_20200606_225134-fe42f49e.pth +- Name: psanet_r101-d8_769x769_80k_cityscapes + In Collection: PSANet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.69 + mIoU(ms+flip): 80.89 + Config: configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_769x769_80k_cityscapes/psanet_r101-d8_769x769_80k_cityscapes_20200606_214550-7665827b.pth +- Name: psanet_r50-d8_512x512_80k_ade20k + In Collection: PSANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 52.88 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.0 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.14 + mIoU(ms+flip): 41.91 + Config: configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_80k_ade20k/psanet_r50-d8_512x512_80k_ade20k_20200614_144141-835e4b97.pth +- Name: psanet_r101-d8_512x512_80k_ade20k + In Collection: PSANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 76.16 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.5 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.8 + mIoU(ms+flip): 44.75 + Config: configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_80k_ade20k/psanet_r101-d8_512x512_80k_ade20k_20200614_185117-1fab60d4.pth +- Name: psanet_r50-d8_512x512_160k_ade20k + In Collection: PSANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.67 + mIoU(ms+flip): 42.95 + Config: configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_160k_ade20k/psanet_r50-d8_512x512_160k_ade20k_20200615_161258-148077dd.pth +- Name: psanet_r101-d8_512x512_160k_ade20k + In Collection: PSANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.74 + mIoU(ms+flip): 45.38 + Config: configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_160k_ade20k/psanet_r101-d8_512x512_160k_ade20k_20200615_161537-dbfa564c.pth +- Name: psanet_r50-d8_512x512_20k_voc12aug + In Collection: PSANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 54.82 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.9 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.39 + mIoU(ms+flip): 77.34 + Config: configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_20k_voc12aug/psanet_r50-d8_512x512_20k_voc12aug_20200617_102413-2f1bbaa1.pth +- Name: psanet_r101-d8_512x512_20k_voc12aug + In Collection: PSANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 79.18 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 10.4 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.91 + mIoU(ms+flip): 79.3 + Config: configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_20k_voc12aug/psanet_r101-d8_512x512_20k_voc12aug_20200617_110624-946fef11.pth +- Name: psanet_r50-d8_512x512_40k_voc12aug + In Collection: PSANet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.3 + mIoU(ms+flip): 77.35 + Config: configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r50-d8_512x512_40k_voc12aug/psanet_r50-d8_512x512_40k_voc12aug_20200613_161946-f596afb5.pth +- Name: psanet_r101-d8_512x512_40k_voc12aug + In Collection: PSANet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.73 + mIoU(ms+flip): 79.05 + Config: configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/psanet/psanet_r101-d8_512x512_40k_voc12aug/psanet_r101-d8_512x512_40k_voc12aug_20200613_161946-1f560f9e.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..69d212f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..bc25d6a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..7f6795e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..1a3c434 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..f62eef9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..f8865a7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..ffc99f0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..6a9efc5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './psanet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..6671fcb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..a441013 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..9c6364e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(mask_size=(66, 66), num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..af06cb6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..803c42d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..0141a6d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(mask_size=(66, 66), num_classes=150), + auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..690f8b5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..0966b47 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/psanet/psanet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/psanet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/README.md new file mode 100644 index 0000000..a871110 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/README.md @@ -0,0 +1,177 @@ +# PSPNet + +[Pyramid Scene Parsing Network](https://arxiv.org/abs/1612.01105) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Scene parsing is challenging for unrestricted open vocabulary and diverse scenes. In this paper, we exploit the capability of global context information by different-region-based context aggregation through our pyramid pooling module together with the proposed pyramid scene parsing network (PSPNet). Our global prior representation is effective to produce good quality results on the scene parsing task, while PSPNet provides a superior framework for pixel-level prediction tasks. The proposed approach achieves state-of-the-art performance on various datasets. It came first in ImageNet scene parsing challenge 2016, PASCAL VOC 2012 benchmark and Cityscapes benchmark. A single PSPNet yields new record of mIoU accuracy 85.4% on PASCAL VOC 2012 and accuracy 80.2% on Cityscapes. + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{zhao2017pspnet, + title={Pyramid Scene Parsing Network}, + author={Zhao, Hengshuang and Shi, Jianping and Qi, Xiaojuan and Wang, Xiaogang and Jia, Jiaya}, + booktitle={CVPR}, + year={2017} +} +``` + +```bibtex +@article{wightman2021resnet, + title={Resnet strikes back: An improved training procedure in timm}, + author={Wightman, Ross and Touvron, Hugo and J{\'e}gou, Herv{\'e}}, + journal={arXiv preprint arXiv:2110.00476}, + year={2021} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------------- | ------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-50-D8 | 512x1024 | 40000 | 6.1 | 4.07 | 77.85 | 79.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-101-D8 | 512x1024 | 40000 | 9.6 | 2.68 | 78.34 | 79.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-50-D8 | 769x769 | 40000 | 6.9 | 1.76 | 78.26 | 79.88 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725.log.json) | +| PSPNet | R-101-D8 | 769x769 | 40000 | 10.9 | 1.15 | 79.08 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753.log.json) | +| PSPNet | R-18-D8 | 512x1024 | 80000 | 1.7 | 15.71 | 74.87 | 76.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes-20201225_021458.log.json) | +| PSPNet | R-50-D8 | 512x1024 | 80000 | - | - | 78.55 | 79.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131.log.json) | +| PSPNet | R-50b-D8 rsb | 512x1024 | 80000 | 6.2 | 3.82 | 78.47 | 79.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238-588c30be.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238.log.json) | +| PSPNet | R-101-D8 | 512x1024 | 80000 | - | - | 79.76 | 81.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211.log.json) | +| PSPNet (FP16) | R-101-D8 | 512x1024 | 80000 | 5.34 | 8.77 | 79.46 | - | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919-a0875e5c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919.log.json) | +| PSPNet | R-18-D8 | 769x769 | 80000 | 1.9 | 6.20 | 75.90 | 77.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes_20201225_021458-3deefc62.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes-20201225_021458.log.json) | +| PSPNet | R-50-D8 | 769x769 | 80000 | - | - | 79.59 | 80.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121.log.json) | +| PSPNet | R-101-D8 | 769x769 | 80000 | - | - | 79.77 | 81.06 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055.log.json) | +| PSPNet | R-18b-D8 | 512x1024 | 80000 | 1.5 | 16.28 | 74.23 | 75.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes_20201226_063116-26928a60.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes-20201226_063116.log.json) | +| PSPNet | R-50b-D8 | 512x1024 | 80000 | 6.0 | 4.30 | 78.22 | 79.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes_20201225_094315-6344287a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes-20201225_094315.log.json) | +| PSPNet | R-101b-D8 | 512x1024 | 80000 | 9.5 | 2.76 | 79.69 | 80.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| PSPNet | R-18b-D8 | 769x769 | 80000 | 1.7 | 6.41 | 74.92 | 76.90 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes_20201226_080942-bf98d186.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes-20201226_080942.log.json) | +| PSPNet | R-50b-D8 | 769x769 | 80000 | 6.8 | 1.88 | 78.50 | 79.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes_20201225_094316-4c643cf6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes-20201225_094316.log.json) | +| PSPNet | R-101b-D8 | 769x769 | 80000 | 10.8 | 1.17 | 78.87 | 80.04 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes_20201226_171823-f0e7c293.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes-20201226_171823.log.json) | +| PSPNet | R-50-D32 | 512x1024 | 80000 | 3.0 | 15.21 | 73.88 | 76.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840-9092b254.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840.log.json) | +| PSPNet | R-50b-D32 rsb | 512x1024 | 80000 | 3.1 | 16.08 | 74.09 | 77.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229-dd9c9610.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229.log.json) | +| PSPNet | R-50b-D32 | 512x1024 | 80000 | 2.9 | 15.41 | 72.61 | 75.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152-23bcaf8c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 80000 | 8.5 | 23.53 | 41.13 | 41.94 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 12 | 15.30 | 43.57 | 44.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423.log.json) | +| PSPNet | R-50-D8 | 512x512 | 160000 | - | - | 42.48 | 43.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358.log.json) | +| PSPNet | R-101-D8 | 512x512 | 160000 | - | - | 44.39 | 45.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 20000 | 6.1 | 23.59 | 76.78 | 77.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958.log.json) | +| PSPNet | R-101-D8 | 512x512 | 20000 | 9.6 | 15.02 | 78.47 | 79.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003.log.json) | +| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | 77.29 | 78.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222.log.json) | +| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | 78.52 | 79.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222.log.json) | + +### Pascal Context + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-101-D8 | 480x480 | 40000 | 8.8 | 9.68 | 46.60 | 47.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context-20200911_211210.log.json) | +| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | 46.03 | 47.15 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context-20200911_190530.log.json) | + +### Pascal Context 59 + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-101-D8 | 480x480 | 40000 | - | - | 52.02 | 53.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59_20210416_114524-86d44cd4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59-20210416_114524.log.json) | +| PSPNet | R-101-D8 | 480x480 | 80000 | - | - | 52.47 | 53.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59_20210416_114418-fa6caaa2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59-20210416_114418.log.json) | + +### Dark Zurich and Nighttime Driving + +We support evaluation results on these two datasets using models above trained on Cityscapes training set. + +| Method | Backbone | Training Dataset | Test Dataset | mIoU | config | evaluation checkpoint | +| ------ | --------- | ----------------------- | ------------------------- | ----- | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-50-D8 | Cityscapes Training set | Dark Zurich | 10.91 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-50-D8 | Cityscapes Training set | Nighttime Driving | 23.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-50-D8 | Cityscapes Training set | Cityscapes Validation set | 77.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338.log.json) | +| PSPNet | R-101-D8 | Cityscapes Training set | Dark Zurich | 10.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x1024_40k_dark.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-101-D8 | Cityscapes Training set | Nighttime Driving | 20.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x1024_40k_night_driving.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-101-D8 | Cityscapes Training set | Cityscapes Validation set | 78.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751.log.json) | +| PSPNet | R-101b-D8 | Cityscapes Training set | Dark Zurich | 15.54 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101b-d8_512x1024_80k_dark.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| PSPNet | R-101b-D8 | Cityscapes Training set | Nighttime Driving | 22.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101b-d8_512x1024_80k_night_driving.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | +| PSPNet | R-101b-D8 | Cityscapes Training set | Cityscapes Validation set | 79.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes-20201226_170012.log.json) | + +### COCO-Stuff 10k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 20000 | 9.6 | 20.5 | 35.69 | 36.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258-b88df27f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258.log.json) | +| PSPNet | R-101-D8 | 512x512 | 20000 | 13.2 | 11.1 | 37.26 | 38.52 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135-76aae482.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135.log.json) | +| PSPNet | R-50-D8 | 512x512 | 40000 | - | - | 36.33 | 37.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857-92e2902b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857.log.json) | +| PSPNet | R-101-D8 | 512x512 | 40000 | - | - | 37.76 | 38.86 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022-831aec95.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022.log.json) | + +### COCO-Stuff 164k + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-50-D8 | 512x512 | 80000 | 9.6 | 20.5 | 38.80 | 39.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-0e41b2db.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 13.2 | 11.1 | 40.34 | 40.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-7eb41789.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034.log.json) | +| PSPNet | R-50-D8 | 512x512 | 160000 | - | - | 39.64 | 39.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-51276a57.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json) | +| PSPNet | R-101-D8 | 512x512 | 160000 | - | - | 41.28 | 41.66 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-4af9621b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004.log.json) | +| PSPNet | R-50-D8 | 512x512 | 320000 | - | - | 40.53 | 40.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-be9610cc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json) | +| PSPNet | R-101-D8 | 512x512 | 320000 | - | - | 41.95 | 42.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-72220c60.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004.log.json) | + +### LoveDA + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| PSPNet | R-18-D8 | 512x512 | 80000 | 1.45 | 26.87 | 48.62 | 47.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_512x512_80k_loveda.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100-b97697f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100.log.json) | +| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 6.60 | 50.46 | 50.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 4.58 | 51.86 | 51.34 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_512x512_80k_loveda.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212-1c06c6a8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212.log.json) | + +### Potsdam + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | --------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-18-D8 | 512x512 | 80000 | 1.50 | 85.12 | 77.09 | 78.30 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612-7cd046e1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json) | +| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.21 | 78.12 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541-2dd5fe67.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.40 | 78.62 | 79.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612.log.json) | + +### Vaihingen + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-18-D8 | 512x512 | 80000 | 1.45 | 85.06 | 71.46 | 73.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355-52a8a6f6.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json) | +| PSPNet | R-50-D8 | 512x512 | 80000 | 6.14 | 30.29 | 72.36 | 73.75 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355-382f8f5b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355.log.json) | +| PSPNet | R-101-D8 | 512x512 | 80000 | 9.61 | 19.97 | 72.61 | 74.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806-8eba0a09.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806.log.json) | + +### iSAID + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| PSPNet | R-18-D8 | 896x896 | 80000 | 4.52 | 26.91 | 60.22 | 61.25 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526-e84c0b6a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526.log.json) | +| PSPNet | R-50-D8 | 896x896 | 80000 | 16.58 | 8.88 | 65.36 | 66.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629-1f21dc32.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629.log.json) | + +Note: + +- `FP16` means Mixed Precision (FP16) is adopted in training. +- `896x896` is the Crop Size of iSAID dataset, which is followed by the implementation of [PointFlow: Flowing Semantics Through Points for Aerial Image Segmentation](https://arxiv.org/pdf/2103.06564.pdf) +- `rsb` is short for 'Resnet strikes back'. +- The `b` in `R-50b` means ResNetV1b, which is a standard ResNet backbone. In MMSegmentation, default backbone is ResNetV1c, which usually performs better in semantic segmentation task. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet.yml new file mode 100644 index 0000000..abbaf6c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet.yml @@ -0,0 +1,1077 @@ +Collections: +- Name: PSPNet + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + - Pascal Context + - Pascal Context 59 + - Dark Zurich and Nighttime Driving + - COCO-Stuff 10k + - COCO-Stuff 164k + - LoveDA + - Potsdam + - Vaihingen + - iSAID + Paper: + URL: https://arxiv.org/abs/1612.01105 + Title: Pyramid Scene Parsing Network + README: configs/pspnet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/psp_head.py#L63 + Version: v0.17.0 + Converted From: + Code: https://github.com/hszhao/PSPNet +Models: +- Name: pspnet_r50-d8_512x1024_40k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 245.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.1 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.85 + mIoU(ms+flip): 79.18 + Config: configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth +- Name: pspnet_r101-d8_512x1024_40k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 373.13 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.6 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.34 + mIoU(ms+flip): 79.74 + Config: configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes/pspnet_r101-d8_512x1024_40k_cityscapes_20200604_232751-467e7cf4.pth +- Name: pspnet_r50-d8_769x769_40k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 568.18 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.26 + mIoU(ms+flip): 79.88 + Config: configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_40k_cityscapes/pspnet_r50-d8_769x769_40k_cityscapes_20200606_112725-86638686.pth +- Name: pspnet_r101-d8_769x769_40k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 869.57 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.08 + mIoU(ms+flip): 80.28 + Config: configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_40k_cityscapes/pspnet_r101-d8_769x769_40k_cityscapes_20200606_112753-61c6f5be.pth +- Name: pspnet_r18-d8_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-18-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 63.65 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 1.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.87 + mIoU(ms+flip): 76.04 + Config: configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes/pspnet_r18-d8_512x1024_80k_cityscapes_20201225_021458-09ffa746.pth +- Name: pspnet_r50-d8_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.55 + mIoU(ms+flip): 79.79 + Config: configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes/pspnet_r50-d8_512x1024_80k_cityscapes_20200606_112131-2376f12b.pth +- Name: pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-50b-D8 rsb + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 261.78 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.47 + mIoU(ms+flip): 79.45 + Config: configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220315_123238-588c30be.pth +- Name: pspnet_r101-d8_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.76 + mIoU(ms+flip): 81.01 + Config: configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes/pspnet_r101-d8_512x1024_80k_cityscapes_20200606_112211-e1e1100f.pth +- Name: pspnet_r101-d8_fp16_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 114.03 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP16 + resolution: (512,1024) + Training Memory (GB): 5.34 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.46 + Config: configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes/pspnet_r101-d8_fp16_512x1024_80k_cityscapes_20200717_230919-a0875e5c.pth +- Name: pspnet_r18-d8_769x769_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-18-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 161.29 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 1.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.9 + mIoU(ms+flip): 77.86 + Config: configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_769x769_80k_cityscapes/pspnet_r18-d8_769x769_80k_cityscapes_20201225_021458-3deefc62.pth +- Name: pspnet_r50-d8_769x769_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.59 + mIoU(ms+flip): 80.69 + Config: configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_769x769_80k_cityscapes/pspnet_r50-d8_769x769_80k_cityscapes_20200606_210121-5ccf03dd.pth +- Name: pspnet_r101-d8_769x769_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.77 + mIoU(ms+flip): 81.06 + Config: configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_769x769_80k_cityscapes/pspnet_r101-d8_769x769_80k_cityscapes_20200606_225055-dba412fa.pth +- Name: pspnet_r18b-d8_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-18b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 61.43 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 1.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.23 + mIoU(ms+flip): 75.79 + Config: configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes/pspnet_r18b-d8_512x1024_80k_cityscapes_20201226_063116-26928a60.pth +- Name: pspnet_r50b-d8_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-50b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 232.56 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.22 + mIoU(ms+flip): 79.46 + Config: configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes/pspnet_r50b-d8_512x1024_80k_cityscapes_20201225_094315-6344287a.pth +- Name: pspnet_r101b-d8_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-101b-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 362.32 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 9.5 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.69 + mIoU(ms+flip): 80.79 + Config: configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes/pspnet_r101b-d8_512x1024_80k_cityscapes_20201226_170012-3a4d38ab.pth +- Name: pspnet_r18b-d8_769x769_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-18b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 156.01 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 1.7 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.92 + mIoU(ms+flip): 76.9 + Config: configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes/pspnet_r18b-d8_769x769_80k_cityscapes_20201226_080942-bf98d186.pth +- Name: pspnet_r50b-d8_769x769_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-50b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 531.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 6.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.5 + mIoU(ms+flip): 79.96 + Config: configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes/pspnet_r50b-d8_769x769_80k_cityscapes_20201225_094316-4c643cf6.pth +- Name: pspnet_r101b-d8_769x769_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-101b-D8 + crop size: (769,769) + lr schd: 80000 + inference time (ms/im): + - value: 854.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 10.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.87 + mIoU(ms+flip): 80.04 + Config: configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes/pspnet_r101b-d8_769x769_80k_cityscapes_20201226_171823-f0e7c293.pth +- Name: pspnet_r50-d32_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-50-D32 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 65.75 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.88 + mIoU(ms+flip): 76.85 + Config: configs/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes/pspnet_r50-d32_512x1024_80k_cityscapes_20220316_224840-9092b254.pth +- Name: pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-50b-D32 rsb + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 62.19 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.1 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.09 + mIoU(ms+flip): 77.18 + Config: configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes_20220316_141229-dd9c9610.pth +- Name: pspnet_r50b-d32_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: R-50b-D32 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 64.89 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 2.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 72.61 + mIoU(ms+flip): 75.51 + Config: configs/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes/pspnet_r50b-d32_512x1024_80k_cityscapes_20220311_152152-23bcaf8c.pth +- Name: pspnet_r50-d8_512x512_80k_ade20k + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 42.5 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.5 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 41.13 + mIoU(ms+flip): 41.94 + Config: configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_ade20k/pspnet_r50-d8_512x512_80k_ade20k_20200615_014128-15a8b914.pth +- Name: pspnet_r101-d8_512x512_80k_ade20k + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 65.36 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 12.0 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.57 + mIoU(ms+flip): 44.35 + Config: configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_ade20k/pspnet_r101-d8_512x512_80k_ade20k_20200614_031423-b6e782f0.pth +- Name: pspnet_r50-d8_512x512_160k_ade20k + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.48 + mIoU(ms+flip): 43.44 + Config: configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_160k_ade20k/pspnet_r50-d8_512x512_160k_ade20k_20200615_184358-1890b0bd.pth +- Name: pspnet_r101-d8_512x512_160k_ade20k + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.39 + mIoU(ms+flip): 45.35 + Config: configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_160k_ade20k/pspnet_r101-d8_512x512_160k_ade20k_20200615_100650-967c316f.pth +- Name: pspnet_r50-d8_512x512_20k_voc12aug + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 42.39 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.1 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 76.78 + mIoU(ms+flip): 77.61 + Config: configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_20k_voc12aug/pspnet_r50-d8_512x512_20k_voc12aug_20200617_101958-ed5dfbd9.pth +- Name: pspnet_r101-d8_512x512_20k_voc12aug + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 66.58 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.6 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.47 + mIoU(ms+flip): 79.25 + Config: configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_20k_voc12aug/pspnet_r101-d8_512x512_20k_voc12aug_20200617_102003-4aef3c9a.pth +- Name: pspnet_r50-d8_512x512_40k_voc12aug + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.29 + mIoU(ms+flip): 78.48 + Config: configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_40k_voc12aug/pspnet_r50-d8_512x512_40k_voc12aug_20200613_161222-ae9c1b8c.pth +- Name: pspnet_r101-d8_512x512_40k_voc12aug + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 78.52 + mIoU(ms+flip): 79.57 + Config: configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_40k_voc12aug/pspnet_r101-d8_512x512_40k_voc12aug_20200613_161222-bc933b18.pth +- Name: pspnet_r101-d8_480x480_40k_pascal_context + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 40000 + inference time (ms/im): + - value: 103.31 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (480,480) + Training Memory (GB): 8.8 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.6 + mIoU(ms+flip): 47.78 + Config: configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context/pspnet_r101-d8_480x480_40k_pascal_context_20200911_211210-bf0f5d7c.pth +- Name: pspnet_r101-d8_480x480_80k_pascal_context + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context + Metrics: + mIoU: 46.03 + mIoU(ms+flip): 47.15 + Config: configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context/pspnet_r101-d8_480x480_80k_pascal_context_20200911_190530-c86d6233.pth +- Name: pspnet_r101-d8_480x480_40k_pascal_context_59 + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.02 + mIoU(ms+flip): 53.54 + Config: configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59/pspnet_r101-d8_480x480_40k_pascal_context_59_20210416_114524-86d44cd4.pth +- Name: pspnet_r101-d8_480x480_80k_pascal_context_59 + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (480,480) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal Context 59 + Metrics: + mIoU: 52.47 + mIoU(ms+flip): 53.99 + Config: configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59/pspnet_r101-d8_480x480_80k_pascal_context_59_20210416_114418-fa6caaa2.pth +- Name: pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 48.78 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.6 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 35.69 + mIoU(ms+flip): 36.62 + Config: configs/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k_20210820_203258-b88df27f.pth +- Name: pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 90.09 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 13.2 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.26 + mIoU(ms+flip): 38.52 + Config: configs/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k_20210820_232135-76aae482.pth +- Name: pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 36.33 + mIoU(ms+flip): 37.24 + Config: configs/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k_20210821_030857-92e2902b.pth +- Name: pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 10k + Metrics: + mIoU: 37.76 + mIoU(ms+flip): 38.86 + Config: configs/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k_20210821_014022-831aec95.pth +- Name: pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 48.78 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.6 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 38.8 + mIoU(ms+flip): 39.19 + Config: configs/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-0e41b2db.pth +- Name: pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 90.09 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 13.2 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 40.34 + mIoU(ms+flip): 40.79 + Config: configs/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k_20210707_152034-7eb41789.pth +- Name: pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 39.64 + mIoU(ms+flip): 39.97 + Config: configs/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-51276a57.pth +- Name: pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.28 + mIoU(ms+flip): 41.66 + Config: configs/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k_20210707_152004-4af9621b.pth +- Name: pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 320000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 40.53 + mIoU(ms+flip): 40.75 + Config: configs/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-be9610cc.pth +- Name: pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 320000 + Results: + - Task: Semantic Segmentation + Dataset: COCO-Stuff 164k + Metrics: + mIoU: 41.95 + mIoU(ms+flip): 42.42 + Config: configs/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k_20210707_152004-72220c60.pth +- Name: pspnet_r18-d8_512x512_80k_loveda + In Collection: PSPNet + Metadata: + backbone: R-18-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 37.22 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.45 + Results: + - Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 48.62 + mIoU(ms+flip): 47.57 + Config: configs/pspnet/pspnet_r18-d8_512x512_80k_loveda.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_512x512_80k_loveda/pspnet_r18-d8_512x512_80k_loveda_20211105_052100-b97697f1.pth +- Name: pspnet_r50-d8_512x512_80k_loveda + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 151.52 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.14 + Results: + - Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 50.46 + mIoU(ms+flip): 50.19 + Config: configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x512_80k_loveda/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth +- Name: pspnet_r101-d8_512x512_80k_loveda + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 218.34 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.61 + Results: + - Task: Semantic Segmentation + Dataset: LoveDA + Metrics: + mIoU: 51.86 + mIoU(ms+flip): 51.34 + Config: configs/pspnet/pspnet_r101-d8_512x512_80k_loveda.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_512x512_80k_loveda/pspnet_r101-d8_512x512_80k_loveda_20211104_153212-1c06c6a8.pth +- Name: pspnet_r18-d8_4x4_512x512_80k_potsdam + In Collection: PSPNet + Metadata: + backbone: R-18-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 11.75 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.5 + Results: + - Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 77.09 + mIoU(ms+flip): 78.3 + Config: configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam/pspnet_r18-d8_4x4_512x512_80k_potsdam_20211220_125612-7cd046e1.pth +- Name: pspnet_r50-d8_4x4_512x512_80k_potsdam + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 33.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.14 + Results: + - Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.12 + mIoU(ms+flip): 78.98 + Config: configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam/pspnet_r50-d8_4x4_512x512_80k_potsdam_20211219_043541-2dd5fe67.pth +- Name: pspnet_r101-d8_4x4_512x512_80k_potsdam + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 51.55 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.61 + Results: + - Task: Semantic Segmentation + Dataset: Potsdam + Metrics: + mIoU: 78.62 + mIoU(ms+flip): 79.47 + Config: configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam/pspnet_r101-d8_4x4_512x512_80k_potsdam_20211220_125612-aed036c4.pth +- Name: pspnet_r18-d8_4x4_512x512_80k_vaihingen + In Collection: PSPNet + Metadata: + backbone: R-18-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 11.76 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.45 + Results: + - Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 71.46 + mIoU(ms+flip): 73.36 + Config: configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen/pspnet_r18-d8_4x4_512x512_80k_vaihingen_20211228_160355-52a8a6f6.pth +- Name: pspnet_r50-d8_4x4_512x512_80k_vaihingen + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 33.01 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.14 + Results: + - Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.36 + mIoU(ms+flip): 73.75 + Config: configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen/pspnet_r50-d8_4x4_512x512_80k_vaihingen_20211228_160355-382f8f5b.pth +- Name: pspnet_r101-d8_4x4_512x512_80k_vaihingen + In Collection: PSPNet + Metadata: + backbone: R-101-D8 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 50.08 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.61 + Results: + - Task: Semantic Segmentation + Dataset: Vaihingen + Metrics: + mIoU: 72.61 + mIoU(ms+flip): 74.18 + Config: configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen/pspnet_r101-d8_4x4_512x512_80k_vaihingen_20211231_230806-8eba0a09.pth +- Name: pspnet_r18-d8_4x4_896x896_80k_isaid + In Collection: PSPNet + Metadata: + backbone: R-18-D8 + crop size: (896,896) + lr schd: 80000 + inference time (ms/im): + - value: 37.16 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (896,896) + Training Memory (GB): 4.52 + Results: + - Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 60.22 + mIoU(ms+flip): 61.25 + Config: configs/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid/pspnet_r18-d8_4x4_896x896_80k_isaid_20220110_180526-e84c0b6a.pth +- Name: pspnet_r50-d8_4x4_896x896_80k_isaid + In Collection: PSPNet + Metadata: + backbone: R-50-D8 + crop size: (896,896) + lr schd: 80000 + inference time (ms/im): + - value: 112.61 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (896,896) + Training Memory (GB): 16.58 + Results: + - Task: Semantic Segmentation + Dataset: iSAID + Metrics: + mIoU: 65.36 + mIoU(ms+flip): 66.48 + Config: configs/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid/pspnet_r50-d8_4x4_896x896_80k_isaid_20220110_180629-1f21dc32.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..0b5a990 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_480x480_40k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..081cb37 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_480x480_40k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..fda9110 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_480x480_80k_pascal_context.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..795c51f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_480x480_80k_pascal_context_59.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam.py new file mode 100644 index 0000000..98343dd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_potsdam.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4x4_512x512_80k_potsdam.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen.py new file mode 100644 index 0000000..fd79492 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_4x4_512x512_80k_vaihingen.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_4x4_512x512_80k_vaihingen.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..38fee11 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_40k_dark.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_40k_dark.py new file mode 100644 index 0000000..1057639 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_40k_dark.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x1024_40k_dark.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_40k_night_driving.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_40k_night_driving.py new file mode 100644 index 0000000..0ecb930 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_40k_night_driving.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x1024_40k_night_driving.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..9931a07 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..6107b41 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..2221b20 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..15f578b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k.py new file mode 100644 index 0000000..7ae2061 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_160k_coco-stuff164k.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k.py new file mode 100644 index 0000000..a448496 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_20k_coco-stuff10k.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k.py new file mode 100644 index 0000000..90512b8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_320k_coco-stuff164k.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k.py new file mode 100644 index 0000000..36aa443 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_40k_coco-stuff10k.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k.py new file mode 100644 index 0000000..fdddec4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_4x4_80k_coco-stuff164k.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..fb7c3d5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_80k_loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_80k_loveda.py new file mode 100644 index 0000000..03c0251 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_512x512_80k_loveda.py @@ -0,0 +1,6 @@ +_base_ = './pspnet_r50-d8_512x512_80k_loveda.py' +model = dict( + backbone=dict( + depth=101, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet101_v1c'))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..c6e7e58 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..59b8c6d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..c71b7f6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101-d8_fp16_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = './pspnet_r101-d8_512x1024_80k_cityscapes.py' +# fp16 settings +optimizer_config = dict(type='Fp16OptimizerHook', loss_scale=512.) +# fp16 placeholder +fp16 = dict() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..ab8a3d3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_512x1024_80k_dark.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_512x1024_80k_dark.py new file mode 100644 index 0000000..49231d8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_512x1024_80k_dark.py @@ -0,0 +1,4 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_dark.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_512x1024_80k_night_driving.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_512x1024_80k_night_driving.py new file mode 100644 index 0000000..c3ed2f1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_512x1024_80k_night_driving.py @@ -0,0 +1,4 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_night_driving.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..1a7cb70 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r101b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet101', + backbone=dict(type='ResNet', depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam.py new file mode 100644 index 0000000..be9dc72 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_potsdam.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4x4_512x512_80k_potsdam.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen.py new file mode 100644 index 0000000..2cb6922 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_4x4_512x512_80k_vaihingen.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4x4_512x512_80k_vaihingen.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid.py new file mode 100644 index 0000000..4f6f9ab --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_4x4_896x896_80k_isaid.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_4x4_896x896_80k_isaid.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..d914f93 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_512x512_80k_loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_512x512_80k_loveda.py new file mode 100644 index 0000000..dbb832b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_512x512_80k_loveda.py @@ -0,0 +1,11 @@ +_base_ = './pspnet_r50-d8_512x512_80k_loveda.py' +model = dict( + backbone=dict( + depth=18, + init_cfg=dict( + type='Pretrained', checkpoint='open-mmlab://resnet18_v1c')), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..5893e66 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..abeeedf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..284be6d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r18b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py' +model = dict( + pretrained='torchvision://resnet18', + backbone=dict(type='ResNet', depth=18), + decode_head=dict( + in_channels=512, + channels=128, + ), + auxiliary_head=dict(in_channels=256, channels=64)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..6bfeef3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d32_512x1024_80k_cityscapes.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict(backbone=dict(dilations=(1, 1, 2, 4), strides=(1, 2, 2, 2))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py new file mode 100644 index 0000000..0283876 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d32_rsb-pretrain_512x1024_adamw_80k_cityscapes.py @@ -0,0 +1,25 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth' # noqa +model = dict( + pretrained=None, + backbone=dict( + type='ResNet', + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint), + dilations=(1, 1, 2, 4), + strides=(1, 2, 2, 2))) + +optimizer = dict(_delete_=True, type='AdamW', lr=0.0005, weight_decay=0.05) +optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +# learning policy +lr_config = dict( + _delete_=True, + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.001, + step=[60000, 72000], + by_epoch=False) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py new file mode 100644 index 0000000..30abe46 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context_59.py new file mode 100644 index 0000000..88041c6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_40k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py new file mode 100644 index 0000000..09e96da --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=60), + auxiliary_head=dict(num_classes=60), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context_59.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context_59.py new file mode 100644 index 0000000..d4065ec --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_480x480_80k_pascal_context_59.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_context_59.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=59), + auxiliary_head=dict(num_classes=59), + test_cfg=dict(mode='slide', crop_size=(480, 480), stride=(320, 320))) +optimizer = dict(type='SGD', lr=0.004, momentum=0.9, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py new file mode 100644 index 0000000..f78faff --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_potsdam.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/potsdam.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=6), auxiliary_head=dict(num_classes=6)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py new file mode 100644 index 0000000..dfdd294 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_4x4_512x512_80k_vaihingen.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/vaihingen.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=6), auxiliary_head=dict(num_classes=6)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid.py new file mode 100644 index 0000000..ef7eb99 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_4x4_896x896_80k_isaid.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/isaid.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=16), auxiliary_head=dict(num_classes=16)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..5deb587 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py new file mode 100644 index 0000000..9abb511 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_40k_dark.py @@ -0,0 +1,29 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1920, 1080), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + test=dict( + type='DarkZurichDataset', + data_root='data/dark_zurich/', + img_dir='rgb_anon/val/night/GOPR0356', + ann_dir='gt/val/night/GOPR0356', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py new file mode 100644 index 0000000..195aeea --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_40k_night_driving.py @@ -0,0 +1,29 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1920, 1080), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + test=dict( + type='NightDrivingDataset', + data_root='data/NighttimeDrivingTest/', + img_dir='leftImg8bit/test/night', + ann_dir='gtCoarse_daytime_trainvaltest/test/night', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..4e99728 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py new file mode 100644 index 0000000..2f16171 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_80k_dark.py @@ -0,0 +1,30 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1920, 1080), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +data = dict( + test=dict( + type='DarkZurichDataset', + data_root='data/dark_zurich/', + img_dir='rgb_anon/val/night/GOPR0356', + ann_dir='gt/val/night/GOPR0356', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py new file mode 100644 index 0000000..ecc5d99 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x1024_80k_night_driving.py @@ -0,0 +1,29 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(1920, 1080), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + test=dict( + type='NightDrivingDataset', + data_root='data/NighttimeDrivingTest/', + img_dir='leftImg8bit/test/night', + ann_dir='gtCoarse_daytime_trainvaltest/test/night', + pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..8658457 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py new file mode 100644 index 0000000..cd88154 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py new file mode 100644 index 0000000..f0c20c1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k.py new file mode 100644 index 0000000..e1f8887 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_160k_coco-stuff164k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k.py new file mode 100644 index 0000000..6cd94f9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_20k_coco-stuff10k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/coco-stuff10k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k.py new file mode 100644 index 0000000..32b3281 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_320k_coco-stuff164k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_320k.py' +] +model = dict( + decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k.py new file mode 100644 index 0000000..c792bb4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_40k_coco-stuff10k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/coco-stuff10k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k.py new file mode 100644 index 0000000..7f7bc64 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_4x4_80k_coco-stuff164k.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/coco-stuff164k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=171), auxiliary_head=dict(num_classes=171)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py new file mode 100644 index 0000000..52efdf5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py new file mode 100644 index 0000000..830af48 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/loveda.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=7), auxiliary_head=dict(num_classes=7)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py new file mode 100644 index 0000000..145cadb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..23a81eb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py new file mode 100644 index 0000000..a8a80bf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50-d8_rsb-pretrain_512x1024_adamw_80k_cityscapes.py @@ -0,0 +1,23 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +checkpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth' # noqa +model = dict( + pretrained=None, + backbone=dict( + type='ResNet', + init_cfg=dict( + type='Pretrained', prefix='backbone.', checkpoint=checkpoint))) + +optimizer = dict(_delete_=True, type='AdamW', lr=0.0005, weight_decay=0.05) +optimizer_config = dict(grad_clip=dict(max_norm=1, norm_type=2)) +# learning policy +lr_config = dict( + _delete_=True, + policy='step', + warmup='linear', + warmup_iters=1000, + warmup_ratio=0.001, + step=[60000, 72000], + by_epoch=False) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..7f4f6c9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50b-d32_512x1024_80k_cityscapes.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + pretrained='torchvision://resnet50', + backbone=dict(type='ResNet', dilations=(1, 1, 2, 4), strides=(1, 2, 2, 2))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..946bf4f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50b-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_512x1024_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py new file mode 100644 index 0000000..b6087dc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/pspnet/pspnet_r50b-d8_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './pspnet_r50-d8_769x769_80k_cityscapes.py' +model = dict(pretrained='torchvision://resnet50', backbone=dict(type='ResNet')) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/README.md new file mode 100644 index 0000000..1b116dc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/README.md @@ -0,0 +1,54 @@ +# ResNeSt + +[ResNeSt: Split-Attention Networks](https://arxiv.org/abs/2004.08955) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +It is well known that featuremap attention and multi-path representation are important for visual recognition. In this paper, we present a modularized architecture, which applies the channel-wise attention on different network branches to leverage their success in capturing cross-feature interactions and learning diverse representations. Our design results in a simple and unified computation block, which can be parameterized using only a few variables. Our model, named ResNeSt, outperforms EfficientNet in accuracy and latency trade-off on image classification. In addition, ResNeSt has achieved superior transfer learning results on several public benchmarks serving as the backbone, and has been adopted by the winning entries of COCO-LVIS challenge. The source code for complete system and pretrained models are publicly available. + + + +
+ +
+ +## Citation + +```bibtex +@article{zhang2020resnest, +title={ResNeSt: Split-Attention Networks}, +author={Zhang, Hang and Wu, Chongruo and Zhang, Zhongyue and Zhu, Yi and Zhang, Zhi and Lin, Haibin and Sun, Yue and He, Tong and Muller, Jonas and Manmatha, R. and Li, Mu and Smola, Alexander}, +journal={arXiv preprint arXiv:2004.08955}, +year={2020} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ----------------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | S-101-D8 | 512x1024 | 80000 | 11.4 | 2.39 | 77.56 | 78.98 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json) | +| PSPNet | S-101-D8 | 512x1024 | 80000 | 11.8 | 2.52 | 78.57 | 79.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes-20200807_140631.log.json) | +| DeepLabV3 | S-101-D8 | 512x1024 | 80000 | 11.9 | 1.88 | 79.67 | 80.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) | +| DeepLabV3+ | S-101-D8 | 512x1024 | 80000 | 13.2 | 2.36 | 79.62 | 80.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes-20200807_144429.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FCN | S-101-D8 | 512x512 | 160000 | 14.2 | 12.86 | 45.62 | 46.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k-20200807_145416.log.json) | +| PSPNet | S-101-D8 | 512x512 | 160000 | 14.2 | 13.02 | 45.44 | 46.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k-20200807_145416.log.json) | +| DeepLabV3 | S-101-D8 | 512x512 | 160000 | 14.6 | 9.28 | 45.71 | 46.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) | +| DeepLabV3+ | S-101-D8 | 512x512 | 160000 | 16.2 | 11.96 | 46.47 | 47.27 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k-20200807_144503.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..f983986 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..e3924ad --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3/deeplabv3_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..69bef72 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..d51bccb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = '../deeplabv3plus/deeplabv3plus_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..33fa025 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = '../fcn/fcn_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..dcee8c2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = '../fcn/fcn_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..9737849 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = '../pspnet/pspnet_r101-d8_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py new file mode 100644 index 0000000..6a622ea --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = '../pspnet/pspnet_r101-d8_512x512_160k_ade20k.py' +model = dict( + pretrained='open-mmlab://resnest101', + backbone=dict( + type='ResNeSt', + stem_channels=128, + radix=2, + reduction_factor=4, + avg_down_stride=True)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/resnest.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/resnest.yml new file mode 100644 index 0000000..b2ca259 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/resnest/resnest.yml @@ -0,0 +1,177 @@ +Models: +- Name: fcn_s101-d8_512x1024_80k_cityscapes + In Collection: FCN + Metadata: + backbone: S-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 418.41 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 11.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.56 + mIoU(ms+flip): 78.98 + Config: configs/resnest/fcn_s101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x1024_80k_cityscapes/fcn_s101-d8_512x1024_80k_cityscapes_20200807_140631-f8d155b3.pth +- Name: pspnet_s101-d8_512x1024_80k_cityscapes + In Collection: PSPNet + Metadata: + backbone: S-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 396.83 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 11.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.57 + mIoU(ms+flip): 79.19 + Config: configs/resnest/pspnet_s101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x1024_80k_cityscapes/pspnet_s101-d8_512x1024_80k_cityscapes_20200807_140631-c75f3b99.pth +- Name: deeplabv3_s101-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3 + Metadata: + backbone: S-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 531.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 11.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.67 + mIoU(ms+flip): 80.51 + Config: configs/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x1024_80k_cityscapes/deeplabv3_s101-d8_512x1024_80k_cityscapes_20200807_144429-b73c4270.pth +- Name: deeplabv3plus_s101-d8_512x1024_80k_cityscapes + In Collection: DeepLabV3+ + Metadata: + backbone: S-101-D8 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 423.73 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 13.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.62 + mIoU(ms+flip): 80.27 + Config: configs/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x1024_80k_cityscapes/deeplabv3plus_s101-d8_512x1024_80k_cityscapes_20200807_144429-1239eb43.pth +- Name: fcn_s101-d8_512x512_160k_ade20k + In Collection: FCN + Metadata: + backbone: S-101-D8 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 77.76 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 14.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.62 + mIoU(ms+flip): 46.16 + Config: configs/resnest/fcn_s101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/fcn_s101-d8_512x512_160k_ade20k/fcn_s101-d8_512x512_160k_ade20k_20200807_145416-d3160329.pth +- Name: pspnet_s101-d8_512x512_160k_ade20k + In Collection: PSPNet + Metadata: + backbone: S-101-D8 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 76.8 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 14.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.44 + mIoU(ms+flip): 46.28 + Config: configs/resnest/pspnet_s101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/pspnet_s101-d8_512x512_160k_ade20k/pspnet_s101-d8_512x512_160k_ade20k_20200807_145416-a6daa92a.pth +- Name: deeplabv3_s101-d8_512x512_160k_ade20k + In Collection: DeepLabV3 + Metadata: + backbone: S-101-D8 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 107.76 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 14.6 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.71 + mIoU(ms+flip): 46.59 + Config: configs/resnest/deeplabv3_s101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3_s101-d8_512x512_160k_ade20k/deeplabv3_s101-d8_512x512_160k_ade20k_20200807_144503-17ecabe5.pth +- Name: deeplabv3plus_s101-d8_512x512_160k_ade20k + In Collection: DeepLabV3+ + Metadata: + backbone: S-101-D8 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 83.61 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 16.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.47 + mIoU(ms+flip): 47.27 + Config: configs/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/resnest/deeplabv3plus_s101-d8_512x512_160k_ade20k/deeplabv3plus_s101-d8_512x512_160k_ade20k_20200807_144503-27b26226.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/README.md new file mode 100644 index 0000000..3093fcd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/README.md @@ -0,0 +1,127 @@ +# SegFormer + +[SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers](https://arxiv.org/abs/2105.15203) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +We present SegFormer, a simple, efficient yet powerful semantic segmentation framework which unifies Transformers with lightweight multilayer perception (MLP) decoders. SegFormer has two appealing features: 1) SegFormer comprises a novel hierarchically structured Transformer encoder which outputs multiscale features. It does not need positional encoding, thereby avoiding the interpolation of positional codes which leads to decreased performance when the testing resolution differs from training. 2) SegFormer avoids complex decoders. The proposed MLP decoder aggregates information from different layers, and thus combining both local attention and global attention to render powerful representations. We show that this simple and lightweight design is the key to efficient segmentation on Transformers. We scale our approach up to obtain a series of models from SegFormer-B0 to SegFormer-B5, reaching significantly better performance and efficiency than previous counterparts. For example, SegFormer-B4 achieves 50.3% mIoU on ADE20K with 64M parameters, being 5x smaller and 2.2% better than the previous best method. Our best model, SegFormer-B5, achieves 84.0% mIoU on Cityscapes validation set and shows excellent zero-shot robustness on Cityscapes-C. Code will be released at: [this http URL](https://github.com/NVlabs/SegFormer). + + + +
+ +
+ +## Citation + +```bibtex +@article{xie2021segformer, + title={SegFormer: Simple and Efficient Design for Semantic Segmentation with Transformers}, + author={Xie, Enze and Wang, Wenhai and Yu, Zhiding and Anandkumar, Anima and Alvarez, Jose M and Luo, Ping}, + journal={arXiv preprint arXiv:2105.15203}, + year={2021} +} +``` + +## Usage + +We have provided pretrained models converted from [SegFormer](https://github.com/NVlabs/SegFormer). + +If you want to convert keys on your own, we also provide a script [`mit2mmseg.py`](../../tools/model_converters/mit2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/NVlabs/SegFormer) to MMSegmentation style. + +```shell +python tools/model_converters/mit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Segformer | MIT-B0 | 512x512 | 160000 | 2.1 | 38.17 | 37.85 | 38.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20220617_162207-c00b9603.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20220617_162207.log.json) | +| Segformer | MIT-B1 | 512x512 | 160000 | 2.6 | 37.80 | 42.13 | 43.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20220620_112037-c3f39e00.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20220620_112037.log.json) | +| Segformer | MIT-B2 | 512x512 | 160000 | 3.6 | 26.80 | 46.80 | 48.12 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20220620_114047.log.json) | +| Segformer | MIT-B3 | 512x512 | 160000 | 4.8 | 19.19 | 48.25 | 49.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20220617_162254-3a4b7363.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20220617_162254.log.json) | +| Segformer | MIT-B4 | 512x512 | 160000 | 6.1 | 14.54 | 49.09 | 50.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20220620_112216-4fa4f58f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20220620_112216.log.json) | +| Segformer | MIT-B5 | 512x512 | 160000 | 7.2 | 11.89 | 49.13 | 50.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235-94cedf59.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235.log.json) | +| Segformer | MIT-B5 | 640x640 | 160000 | 11.5 | 10.60 | 50.19 | 51.41 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20220617_203542-940a6bd8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20220617_203542.log.json) | + +Evaluation with `AlignedResize`: + +| Method | Backbone | Crop Size | Lr schd | mIoU | mIoU(ms+flip) | +| --------- | -------- | --------- | ------: | ----: | ------------- | +| Segformer | MIT-B0 | 512x512 | 160000 | 38.55 | 39.03 | +| Segformer | MIT-B1 | 512x512 | 160000 | 43.26 | 44.11 | +| Segformer | MIT-B2 | 512x512 | 160000 | 47.46 | 48.16 | +| Segformer | MIT-B3 | 512x512 | 160000 | 49.27 | 49.94 | +| Segformer | MIT-B4 | 512x512 | 160000 | 50.23 | 51.10 | +| Segformer | MIT-B5 | 512x512 | 160000 | 50.08 | 50.72 | +| Segformer | MIT-B5 | 640x640 | 160000 | 51.13 | 51.66 | + +### Cityscapes + +The lower fps result is caused by the sliding window inference scheme (window size:1024x1024). + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| --------- | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Segformer | MIT-B0 | 1024x1024 | 160000 | 3.64 | 4.74 | 76.54 | 78.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857.log.json) | +| Segformer | MIT-B1 | 1024x1024 | 160000 | 4.49 | 4.3 | 78.56 | 79.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213.log.json) | +| Segformer | MIT-B2 | 1024x1024 | 160000 | 7.42 | 3.36 | 81.08 | 82.18 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205.log.json) | +| Segformer | MIT-B3 | 1024x1024 | 160000 | 10.86 | 2.53 | 81.94 | 83.14 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823.log.json) | +| Segformer | MIT-B4 | 1024x1024 | 160000 | 15.07 | 1.88 | 81.89 | 83.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709.log.json) | +| Segformer | MIT-B5 | 1024x1024 | 160000 | 18.00 | 1.39 | 82.25 | 83.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934.log.json) | + +Note: + +Original SegFormer paper uses different `test_pipeline` and image ratios in `ms+flip`. If you want to cite SegFormer original results as benchmark you may modify settings as below: + +- We replace `AlignedResize` in original implementatiuon to `Resize + ResizeToMultiple`. If you want to test by + using `AlignedResize`, you can change the dataset pipeline like this: + +```python +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + # resize image to multiple of 32, improve SegFormer by 0.5-1.0 mIoU. + dict(type='ResizeToMultiple', size_divisor=32), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +``` + +- Different from default setting of `ms+flip`, SegFormer original repo adopts [different image ratios](https://github.com/NVlabs/SegFormer/blob/master/tools/test.py#L97-L101) for ADE20K dataset. To re-implement numerical results of `ms+flip`, you can change image ratios in `tools/test.py` like this: + +```python +if args.aug_test: + if cfg.data.test.type == 'ADE20KDataset': + # hard code index + cfg.data.test.pipeline[1].img_ratios = [ + 0.75, 0.875, 1.0, 1.125, 1.25 + ] +``` + +- Training of SegFormer is not very stable, which is sensitive to random seeds. + +- We use default training setting in MMSegmentation rather than `RepeatDataset` adopted in SegFormer official repo to accelerate [training](https://github.com/NVlabs/SegFormer/blob/master/local_configs/_base_/datasets/ade20k_repeat.py#L38-L39), here is its related [issue](https://github.com/NVlabs/SegFormer/issues/25). diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer.yml new file mode 100644 index 0000000..7d5d244 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer.yml @@ -0,0 +1,303 @@ +Collections: +- Name: Segformer + Metadata: + Training Data: + - ADE20K + - Cityscapes + Paper: + URL: https://arxiv.org/abs/2105.15203 + Title: 'SegFormer: Simple and Efficient Design for Semantic Segmentation with + Transformers' + README: configs/segformer/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/mit.py#L246 + Version: v0.17.0 + Converted From: + Code: https://github.com/NVlabs/SegFormer +Models: +- Name: segformer_mit-b0_512x512_160k_ade20k + In Collection: Segformer + Metadata: + backbone: MIT-B0 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 26.2 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 2.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.85 + mIoU(ms+flip): 38.97 + Config: configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_512x512_160k_ade20k/segformer_mit-b0_512x512_160k_ade20k_20220617_162207-c00b9603.pth +- Name: segformer_mit-b1_512x512_160k_ade20k + In Collection: Segformer + Metadata: + backbone: MIT-B1 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 26.46 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 2.6 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.13 + mIoU(ms+flip): 43.74 + Config: configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_512x512_160k_ade20k/segformer_mit-b1_512x512_160k_ade20k_20220620_112037-c3f39e00.pth +- Name: segformer_mit-b2_512x512_160k_ade20k + In Collection: Segformer + Metadata: + backbone: MIT-B2 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 37.31 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 3.6 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.8 + mIoU(ms+flip): 48.12 + Config: configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_512x512_160k_ade20k/segformer_mit-b2_512x512_160k_ade20k_20220620_114047-64e4feca.pth +- Name: segformer_mit-b3_512x512_160k_ade20k + In Collection: Segformer + Metadata: + backbone: MIT-B3 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 52.11 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 4.8 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.25 + mIoU(ms+flip): 49.58 + Config: configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_512x512_160k_ade20k/segformer_mit-b3_512x512_160k_ade20k_20220617_162254-3a4b7363.pth +- Name: segformer_mit-b4_512x512_160k_ade20k + In Collection: Segformer + Metadata: + backbone: MIT-B4 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 68.78 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.09 + mIoU(ms+flip): 50.72 + Config: configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_512x512_160k_ade20k/segformer_mit-b4_512x512_160k_ade20k_20220620_112216-4fa4f58f.pth +- Name: segformer_mit-b5_512x512_160k_ade20k + In Collection: Segformer + Metadata: + backbone: MIT-B5 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 84.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.13 + mIoU(ms+flip): 50.22 + Config: configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_512x512_160k_ade20k/segformer_mit-b5_512x512_160k_ade20k_20210726_145235-94cedf59.pth +- Name: segformer_mit-b5_640x640_160k_ade20k + In Collection: Segformer + Metadata: + backbone: MIT-B5 + crop size: (640,640) + lr schd: 160000 + inference time (ms/im): + - value: 94.34 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (640,640) + Training Memory (GB): 11.5 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 50.19 + mIoU(ms+flip): 51.41 + Config: configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_640x640_160k_ade20k/segformer_mit-b5_640x640_160k_ade20k_20220617_203542-940a6bd8.pth +- Name: segformer_mit-b0_8x1_1024x1024_160k_cityscapes + In Collection: Segformer + Metadata: + backbone: MIT-B0 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 210.97 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 3.64 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.54 + mIoU(ms+flip): 78.22 + Config: configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes/segformer_mit-b0_8x1_1024x1024_160k_cityscapes_20211208_101857-e7f88502.pth +- Name: segformer_mit-b1_8x1_1024x1024_160k_cityscapes + In Collection: Segformer + Metadata: + backbone: MIT-B1 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 232.56 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 4.49 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.56 + mIoU(ms+flip): 79.73 + Config: configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes/segformer_mit-b1_8x1_1024x1024_160k_cityscapes_20211208_064213-655c7b3f.pth +- Name: segformer_mit-b2_8x1_1024x1024_160k_cityscapes + In Collection: Segformer + Metadata: + backbone: MIT-B2 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 297.62 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 7.42 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.08 + mIoU(ms+flip): 82.18 + Config: configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes/segformer_mit-b2_8x1_1024x1024_160k_cityscapes_20211207_134205-6096669a.pth +- Name: segformer_mit-b3_8x1_1024x1024_160k_cityscapes + In Collection: Segformer + Metadata: + backbone: MIT-B3 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 395.26 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 10.86 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.94 + mIoU(ms+flip): 83.14 + Config: configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes/segformer_mit-b3_8x1_1024x1024_160k_cityscapes_20211206_224823-a8f8a177.pth +- Name: segformer_mit-b4_8x1_1024x1024_160k_cityscapes + In Collection: Segformer + Metadata: + backbone: MIT-B4 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 531.91 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 15.07 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 81.89 + mIoU(ms+flip): 83.38 + Config: configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes/segformer_mit-b4_8x1_1024x1024_160k_cityscapes_20211207_080709-07f6c333.pth +- Name: segformer_mit-b5_8x1_1024x1024_160k_cityscapes + In Collection: Segformer + Metadata: + backbone: MIT-B5 + crop size: (1024,1024) + lr schd: 160000 + inference time (ms/im): + - value: 719.42 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (1024,1024) + Training Memory (GB): 18.0 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 82.25 + mIoU(ms+flip): 83.48 + Config: configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes/segformer_mit-b5_8x1_1024x1024_160k_cityscapes_20211206_072934-87a052ec.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py new file mode 100644 index 0000000..b0af248 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b0_512x512_160k_ade20k.py @@ -0,0 +1,34 @@ +_base_ = [ + '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth' # noqa + +model = dict(pretrained=checkpoint, decode_head=dict(num_classes=150)) + +# optimizer +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.), + 'head': dict(lr_mult=10.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..7e2cb2d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,37 @@ +_base_ = [ + '../_base_/models/segformer_mit-b0.py', + '../_base_/datasets/cityscapes_1024x1024.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth' # noqa + +model = dict( + backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=checkpoint)), + test_cfg=dict(mode='slide', crop_size=(1024, 1024), stride=(768, 768))) + +# optimizer +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.), + 'head': dict(lr_mult=10.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +data = dict(samples_per_gpu=1, workers_per_gpu=1) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py new file mode 100644 index 0000000..6dee6dd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b1_512x512_160k_ade20k.py @@ -0,0 +1,10 @@ +_base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b1_20220624-02e5a6a1.pth' # noqa + +# model settings +model = dict( + pretrained=checkpoint, + backbone=dict( + embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[2, 2, 2, 2]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..2a3263f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b1_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,8 @@ +_base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b1_20220624-02e5a6a1.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py new file mode 100644 index 0000000..3c63163 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b2_512x512_160k_ade20k.py @@ -0,0 +1,10 @@ +_base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth' # noqa + +# model settings +model = dict( + pretrained=checkpoint, + backbone=dict( + embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 4, 6, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..282cc24 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b2_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b2_20220624-66e8bf70.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_layers=[3, 4, 6, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py new file mode 100644 index 0000000..aa4dc4c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b3_512x512_160k_ade20k.py @@ -0,0 +1,10 @@ +_base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b3_20220624-13b1141c.pth' # noqa + +# model settings +model = dict( + pretrained=checkpoint, + backbone=dict( + embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 4, 18, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..67d70c1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b3_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b3_20220624-13b1141c.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_layers=[3, 4, 18, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py new file mode 100644 index 0000000..f9a026e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b4_512x512_160k_ade20k.py @@ -0,0 +1,10 @@ +_base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth' # noqa + +# model settings +model = dict( + pretrained=checkpoint, + backbone=dict( + embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 8, 27, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..332d840 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b4_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b4_20220624-d588d980.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_layers=[3, 8, 27, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py new file mode 100644 index 0000000..3175ba5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b5_512x512_160k_ade20k.py @@ -0,0 +1,10 @@ +_base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pthh' # noqa + +# model settings +model = dict( + pretrained=checkpoint, + backbone=dict( + embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 6, 40, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py new file mode 100644 index 0000000..ca3f683 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b5_640x640_160k_ade20k.py @@ -0,0 +1,45 @@ +_base_ = ['./segformer_mit-b0_512x512_160k_ade20k.py'] + +# dataset settings +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (640, 640) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 640), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 640), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) + +# model settings +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pthh' # noqa +model = dict( + pretrained=checkpoint, + backbone=dict( + embed_dims=64, num_heads=[1, 2, 5, 8], num_layers=[3, 6, 40, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py new file mode 100644 index 0000000..3015aee --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segformer/segformer_mit-b5_8x1_1024x1024_160k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = ['./segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pthh' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=64, + num_layers=[3, 6, 40, 3]), + decode_head=dict(in_channels=[64, 128, 320, 512])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/README.md new file mode 100644 index 0000000..45041c6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/README.md @@ -0,0 +1,78 @@ +# Segmenter + +[Segmenter: Transformer for Semantic Segmentation](https://arxiv.org/abs/2105.05633) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Image segmentation is often ambiguous at the level of individual image patches and requires contextual information to reach label consensus. In this paper we introduce Segmenter, a transformer model for semantic segmentation. In contrast to convolution-based methods, our approach allows to model global context already at the first layer and throughout the network. We build on the recent Vision Transformer (ViT) and extend it to semantic segmentation. To do so, we rely on the output embeddings corresponding to image patches and obtain class labels from these embeddings with a point-wise linear decoder or a mask transformer decoder. We leverage models pre-trained for image classification and show that we can fine-tune them on moderate sized datasets available for semantic segmentation. The linear decoder allows to obtain excellent results already, but the performance can be further improved by a mask transformer generating class masks. We conduct an extensive ablation study to show the impact of the different parameters, in particular the performance is better for large models and small patch sizes. Segmenter attains excellent results for semantic segmentation. It outperforms the state of the art on both ADE20K and Pascal Context datasets and is competitive on Cityscapes. + + + +
+ +
+ +```bibtex +@inproceedings{strudel2021segmenter, + title={Segmenter: Transformer for semantic segmentation}, + author={Strudel, Robin and Garcia, Ricardo and Laptev, Ivan and Schmid, Cordelia}, + booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision}, + pages={7262--7272}, + year={2021} +} +``` + +## Usage + +We have provided pretrained models converted from [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106). + +If you want to convert keys on your own to use the pre-trained ViT model from [Segmenter](https://github.com/rstrudel/segmenter), we also provide a script [`vitjax2mmseg.py`](../../tools/model_converters/vitjax2mmseg.py) in the tools directory to convert the key of models from [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106) to MMSegmentation style. + +```shell +python tools/model_converters/vitjax2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/vitjax2mmseg.py \ +Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz \ +pretrain/vit_tiny_p16_384.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +In our default setting, pretrained models and their corresponding [ViT-AugReg](https://github.com/rwightman/pytorch-image-models/blob/f55c22bebf9d8afc449d317a723231ef72e0d662/timm/models/vision_transformer.py#L54-L106) models could be defined below: + +| pretrained models | original models | +| --------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| vit_tiny_p16_384.pth | ['vit_tiny_patch16_384'](https://storage.googleapis.com/vit_models/augreg/Ti_16-i21k-300ep-lr_0.001-aug_none-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz) | +| vit_small_p16_384.pth | ['vit_small_patch16_384'](https://storage.googleapis.com/vit_models/augreg/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_384.npz) | +| vit_base_p16_384.pth | ['vit_base_patch16_384'](https://storage.googleapis.com/vit_models/augreg/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_384.npz) | +| vit_large_p16_384.pth | ['vit_large_patch16_384'](https://storage.googleapis.com/vit_models/augreg/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_384.npz) | + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------------- | -------- | --------- | ------- | -------- | -------------- | ----- | ------------- | -------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Segmenter Mask | ViT-T_16 | 512x512 | 160000 | 1.21 | 27.98 | 39.99 | 40.83 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706-ffcf7509.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | +| Segmenter Linear | ViT-S_16 | 512x512 | 160000 | 1.78 | 28.07 | 45.75 | 46.82 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713-39658c46.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713.log.json) | +| Segmenter Mask | ViT-S_16 | 512x512 | 160000 | 2.03 | 24.80 | 46.19 | 47.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706-511bb103.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | +| Segmenter Mask | ViT-B_16 | 512x512 | 160000 | 4.20 | 13.20 | 49.60 | 51.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706-bc533b08.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706.log.json) | +| Segmenter Mask | ViT-L_16 | 640x640 | 160000 | 16.99 | 3.03 | 51.65 | 53.58 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/segmenter/segmenter_vit-l_mask_8x1_640x640_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_640x640_160k_ade20k/segmenter_vit-l_mask_8x1_640x640_160k_ade20k_20220614_024513-4783a347.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_640x640_160k_ade20k/segmenter_vit-l_mask_8x1_640x640_160k_ade20k_20220614_024513.log.json) | + +Note: + +- This model performance is sensitive to the seed values used, please refer to the log file for the specific settings of the seed. If you choose a different seed, the results might differ from the table results. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter.yml new file mode 100644 index 0000000..af1df7d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter.yml @@ -0,0 +1,125 @@ +Collections: +- Name: Segmenter + Metadata: + Training Data: + - ADE20K + Paper: + URL: https://arxiv.org/abs/2105.05633 + Title: 'Segmenter: Transformer for Semantic Segmentation' + README: configs/segmenter/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.21.0/mmseg/models/decode_heads/segmenter_mask_head.py#L15 + Version: v0.21.0 + Converted From: + Code: https://github.com/rstrudel/segmenter +Models: +- Name: segmenter_vit-t_mask_8x1_512x512_160k_ade20k + In Collection: Segmenter + Metadata: + backbone: ViT-T_16 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 35.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.21 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.99 + mIoU(ms+flip): 40.83 + Config: configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k/segmenter_vit-t_mask_8x1_512x512_160k_ade20k_20220105_151706-ffcf7509.pth +- Name: segmenter_vit-s_linear_8x1_512x512_160k_ade20k + In Collection: Segmenter + Metadata: + backbone: ViT-S_16 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 35.63 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 1.78 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.75 + mIoU(ms+flip): 46.82 + Config: configs/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k/segmenter_vit-s_linear_8x1_512x512_160k_ade20k_20220105_151713-39658c46.pth +- Name: segmenter_vit-s_mask_8x1_512x512_160k_ade20k + In Collection: Segmenter + Metadata: + backbone: ViT-S_16 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 40.32 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 2.03 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.19 + mIoU(ms+flip): 47.85 + Config: configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k/segmenter_vit-s_mask_8x1_512x512_160k_ade20k_20220105_151706-511bb103.pth +- Name: segmenter_vit-b_mask_8x1_512x512_160k_ade20k + In Collection: Segmenter + Metadata: + backbone: ViT-B_16 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 75.76 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 4.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.6 + mIoU(ms+flip): 51.07 + Config: configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k/segmenter_vit-b_mask_8x1_512x512_160k_ade20k_20220105_151706-bc533b08.pth +- Name: segmenter_vit-l_mask_8x1_640x640_160k_ade20k + In Collection: Segmenter + Metadata: + backbone: ViT-L_16 + crop size: (640,640) + lr schd: 160000 + inference time (ms/im): + - value: 330.03 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (640,640) + Training Memory (GB): 16.99 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 51.65 + mIoU(ms+flip): 53.58 + Config: configs/segmenter/segmenter_vit-l_mask_8x1_640x640_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/segmenter/segmenter_vit-l_mask_8x1_640x640_160k_ade20k/segmenter_vit-l_mask_8x1_640x640_160k_ade20k_20220614_024513-4783a347.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py new file mode 100644 index 0000000..766a99f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-b_mask_8x1_512x512_160k_ade20k.py @@ -0,0 +1,43 @@ +_base_ = [ + '../_base_/models/segmenter_vit-b16_mask.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +optimizer = dict(lr=0.001, weight_decay=0.0) + +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + # num_gpus: 8 -> batch_size: 8 + samples_per_gpu=1, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-l_mask_8x1_640x640_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-l_mask_8x1_640x640_160k_ade20k.py new file mode 100644 index 0000000..4e6a0b1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-l_mask_8x1_640x640_160k_ade20k.py @@ -0,0 +1,61 @@ +_base_ = [ + '../_base_/models/segmenter_vit-b16_mask.py', + '../_base_/datasets/ade20k_640x640.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_large_p16_384_20220308-d4efb41d.pth' # noqa + +model = dict( + pretrained=checkpoint, + backbone=dict( + type='VisionTransformer', + img_size=(640, 640), + embed_dims=1024, + num_layers=24, + num_heads=16), + decode_head=dict( + type='SegmenterMaskTransformerHead', + in_channels=1024, + channels=1024, + num_heads=16, + embed_dims=1024), + test_cfg=dict(mode='slide', crop_size=(640, 640), stride=(608, 608))) + +optimizer = dict(lr=0.001, weight_decay=0.0) + +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +crop_size = (640, 640) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2560, 640), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2560, 640), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + # num_gpus: 8 -> batch_size: 8 + samples_per_gpu=1, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k.py new file mode 100644 index 0000000..adc8c1b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-s_linear_8x1_512x512_160k_ade20k.py @@ -0,0 +1,14 @@ +_base_ = './segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py' + +model = dict( + decode_head=dict( + _delete_=True, + type='FCNHead', + in_channels=384, + channels=384, + num_convs=0, + dropout_ratio=0.0, + concat_input=False, + num_classes=150, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py new file mode 100644 index 0000000..7e0eeb1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-s_mask_8x1_512x512_160k_ade20k.py @@ -0,0 +1,66 @@ +_base_ = [ + '../_base_/models/segmenter_vit-b16_mask.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_small_p16_384_20220308-410f6037.pth' # noqa + +backbone_norm_cfg = dict(type='LN', eps=1e-6, requires_grad=True) +model = dict( + pretrained=checkpoint, + backbone=dict( + img_size=(512, 512), + embed_dims=384, + num_heads=6, + ), + decode_head=dict( + type='SegmenterMaskTransformerHead', + in_channels=384, + channels=384, + num_classes=150, + num_layers=2, + num_heads=6, + embed_dims=384, + dropout_ratio=0.0, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) + +optimizer = dict(lr=0.001, weight_decay=0.0) + +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + # num_gpus: 8 -> batch_size: 8 + samples_per_gpu=1, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py new file mode 100644 index 0000000..ec0107d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/segmenter/segmenter_vit-t_mask_8x1_512x512_160k_ade20k.py @@ -0,0 +1,56 @@ +_base_ = [ + '../_base_/models/segmenter_vit-b16_mask.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segmenter/vit_tiny_p16_384_20220308-cce8c795.pth' # noqa + +model = dict( + pretrained=checkpoint, + backbone=dict(embed_dims=192, num_heads=3), + decode_head=dict( + type='SegmenterMaskTransformerHead', + in_channels=192, + channels=192, + num_heads=3, + embed_dims=192)) + +optimizer = dict(lr=0.001, weight_decay=0.0) + +img_norm_cfg = dict( + mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations', reduce_zero_label=True), + dict(type='Resize', img_scale=(2048, 512), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 512), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) +] +data = dict( + # num_gpus: 8 -> batch_size: 8 + samples_per_gpu=1, + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/README.md new file mode 100644 index 0000000..054d5db --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/README.md @@ -0,0 +1,51 @@ +# Semantic FPN + +[Panoptic Feature Pyramid Networks](https://arxiv.org/abs/1901.02446) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +The recently introduced panoptic segmentation task has renewed our community's interest in unifying the tasks of instance segmentation (for thing classes) and semantic segmentation (for stuff classes). However, current state-of-the-art methods for this joint task use separate and dissimilar networks for instance and semantic segmentation, without performing any shared computation. In this work, we aim to unify these methods at the architectural level, designing a single network for both tasks. Our approach is to endow Mask R-CNN, a popular instance segmentation method, with a semantic segmentation branch using a shared Feature Pyramid Network (FPN) backbone. Surprisingly, this simple baseline not only remains effective for instance segmentation, but also yields a lightweight, top-performing method for semantic segmentation. In this work, we perform a detailed study of this minimally extended version of Mask R-CNN with FPN, which we refer to as Panoptic FPN, and show it is a robust and accurate baseline for both tasks. Given its effectiveness and conceptual simplicity, we hope our method can serve as a strong baseline and aid future research in panoptic segmentation. + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{kirillov2019panoptic, + title={Panoptic feature pyramid networks}, + author={Kirillov, Alexander and Girshick, Ross and He, Kaiming and Doll{\'a}r, Piotr}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={6399--6408}, + year={2019} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ---------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| FPN | R-50 | 512x1024 | 80000 | 2.8 | 13.54 | 74.52 | 76.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes_20200717_021437-94018a0d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes-20200717_021437.log.json) | +| FPN | R-101 | 512x1024 | 80000 | 3.9 | 10.29 | 75.80 | 77.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn/fpn_r101_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes_20200717_012416-c5800d4c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes-20200717_012416.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------ | -------- | --------- | ------: | -------: | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| FPN | R-50 | 512x512 | 160000 | 4.9 | 55.77 | 37.49 | 39.09 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k_20200718_131734-5b5a6ab9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k-20200718_131734.log.json) | +| FPN | R-101 | 512x512 | 160000 | 5.9 | 40.58 | 39.35 | 40.72 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k_20200718_131734-306b5004.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k-20200718_131734.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r101_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r101_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..7f8710d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r101_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './fpn_r50_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py new file mode 100644 index 0000000..2654096 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './fpn_r50_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..4bf3edd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/fpn_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py new file mode 100644 index 0000000..5cdfc8c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py @@ -0,0 +1,5 @@ +_base_ = [ + '../_base_/models/fpn_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict(decode_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/sem_fpn.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/sem_fpn.yml new file mode 100644 index 0000000..d7ebdfe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/sem_fpn/sem_fpn.yml @@ -0,0 +1,104 @@ +Collections: +- Name: FPN + Metadata: + Training Data: + - Cityscapes + - ADE20K + Paper: + URL: https://arxiv.org/abs/1901.02446 + Title: Panoptic Feature Pyramid Networks + README: configs/sem_fpn/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/fpn_head.py#L12 + Version: v0.17.0 + Converted From: + Code: https://github.com/facebookresearch/detectron2 +Models: +- Name: fpn_r50_512x1024_80k_cityscapes + In Collection: FPN + Metadata: + backbone: R-50 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 73.86 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 2.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.52 + mIoU(ms+flip): 76.08 + Config: configs/sem_fpn/fpn_r50_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x1024_80k_cityscapes/fpn_r50_512x1024_80k_cityscapes_20200717_021437-94018a0d.pth +- Name: fpn_r101_512x1024_80k_cityscapes + In Collection: FPN + Metadata: + backbone: R-101 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 97.18 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 3.9 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.8 + mIoU(ms+flip): 77.4 + Config: configs/sem_fpn/fpn_r101_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x1024_80k_cityscapes/fpn_r101_512x1024_80k_cityscapes_20200717_012416-c5800d4c.pth +- Name: fpn_r50_512x512_160k_ade20k + In Collection: FPN + Metadata: + backbone: R-50 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 17.93 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 4.9 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 37.49 + mIoU(ms+flip): 39.09 + Config: configs/sem_fpn/fpn_r50_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r50_512x512_160k_ade20k/fpn_r50_512x512_160k_ade20k_20200718_131734-5b5a6ab9.pth +- Name: fpn_r101_512x512_160k_ade20k + In Collection: FPN + Metadata: + backbone: R-101 + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 24.64 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 5.9 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.35 + mIoU(ms+flip): 40.72 + Config: configs/sem_fpn/fpn_r101_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/sem_fpn/fpn_r101_512x512_160k_ade20k/fpn_r101_512x512_160k_ade20k_20200718_131734-306b5004.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/README.md new file mode 100644 index 0000000..5afd274 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/README.md @@ -0,0 +1,74 @@ +# SETR + +[Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers](https://arxiv.org/abs/2012.15840) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Most recent semantic segmentation methods adopt a fully-convolutional network (FCN) with an encoder-decoder architecture. The encoder progressively reduces the spatial resolution and learns more abstract/semantic visual concepts with larger receptive fields. Since context modeling is critical for segmentation, the latest efforts have been focused on increasing the receptive field, through either dilated/atrous convolutions or inserting attention modules. However, the encoder-decoder based FCN architecture remains unchanged. In this paper, we aim to provide an alternative perspective by treating semantic segmentation as a sequence-to-sequence prediction task. Specifically, we deploy a pure transformer (ie, without convolution and resolution reduction) to encode an image as a sequence of patches. With the global context modeled in every layer of the transformer, this encoder can be combined with a simple decoder to provide a powerful segmentation model, termed SEgmentation TRansformer (SETR). Extensive experiments show that SETR achieves new state of the art on ADE20K (50.28% mIoU), Pascal Context (55.83% mIoU) and competitive results on Cityscapes. Particularly, we achieve the first position in the highly competitive ADE20K test server leaderboard on the day of submission. + + + +
+ +
+ +```None +This head has two version head. +``` + +## Citation + +```bibtex +@article{zheng2020rethinking, + title={Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective with Transformers}, + author={Zheng, Sixiao and Lu, Jiachen and Zhao, Hengshuang and Zhu, Xiatian and Luo, Zekun and Wang, Yabiao and Fu, Yanwei and Feng, Jianfeng and Xiang, Tao and Torr, Philip HS and others}, + journal={arXiv preprint arXiv:2012.15840}, + year={2020} +} +``` + +## Usage + +You can download the pretrain from [here](https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_384-b3be5167.pth). Then you can convert its keys with the script `vit2mmseg.py` in the tools directory. + +```shell +python tools/model_converters/vit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/vit2mmseg.py \ +jx_vit_large_p16_384-b3be5167.pth pretrain/vit_large_p16.pth +``` + +This script convert the model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| SETR Naive | ViT-L | 512x512 | 16 | 160000 | 18.40 | 4.72 | 48.28 | 49.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_naive_512x512_160k_b16_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258-061f24f5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258.log.json) | +| SETR PUP | ViT-L | 512x512 | 16 | 160000 | 19.54 | 4.50 | 48.24 | 49.99 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_pup_512x512_160k_b16_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343.log.json) | +| SETR MLA | ViT-L | 512x512 | 8 | 160000 | 10.96 | - | 47.34 | 49.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_mla_512x512_160k_b8_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118.log.json) | +| SETR MLA | ViT-L | 512x512 | 16 | 160000 | 17.30 | 5.25 | 47.54 | 49.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_mla_512x512_160k_b16_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057.log.json) | + +### Cityscapes + +| Method | Backbone | Crop Size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | -------- | --------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ---------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| SETR Naive | ViT-L | 768x768 | 8 | 80000 | 24.06 | 0.39 | 78.10 | 80.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505-20728e80.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505.log.json) | +| SETR PUP | ViT-L | 768x768 | 8 | 80000 | 27.96 | 0.37 | 79.21 | 81.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115-f6f37b8f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115.log.json) | +| SETR MLA | ViT-L | 768x768 | 8 | 80000 | 24.10 | 0.41 | 77.00 | 79.59 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003-7f8dccbe.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr.yml new file mode 100644 index 0000000..27f58e4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr.yml @@ -0,0 +1,164 @@ +Collections: +- Name: SETR + Metadata: + Training Data: + - ADE20K + - Cityscapes + Paper: + URL: https://arxiv.org/abs/2012.15840 + Title: Rethinking Semantic Segmentation from a Sequence-to-Sequence Perspective + with Transformers + README: configs/setr/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/setr_up_head.py#L11 + Version: v0.17.0 + Converted From: + Code: https://github.com/fudan-zvg/SETR +Models: +- Name: setr_naive_512x512_160k_b16_ade20k + In Collection: SETR + Metadata: + backbone: ViT-L + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 211.86 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 18.4 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.28 + mIoU(ms+flip): 49.56 + Config: configs/setr/setr_naive_512x512_160k_b16_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_512x512_160k_b16_ade20k/setr_naive_512x512_160k_b16_ade20k_20210619_191258-061f24f5.pth +- Name: setr_pup_512x512_160k_b16_ade20k + In Collection: SETR + Metadata: + backbone: ViT-L + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 222.22 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 19.54 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.24 + mIoU(ms+flip): 49.99 + Config: configs/setr/setr_pup_512x512_160k_b16_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_512x512_160k_b16_ade20k/setr_pup_512x512_160k_b16_ade20k_20210619_191343-7e0ce826.pth +- Name: setr_mla_512x512_160k_b8_ade20k + In Collection: SETR + Metadata: + backbone: ViT-L + crop size: (512,512) + lr schd: 160000 + Training Memory (GB): 10.96 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.34 + mIoU(ms+flip): 49.05 + Config: configs/setr/setr_mla_512x512_160k_b8_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b8_ade20k/setr_mla_512x512_160k_b8_ade20k_20210619_191118-c6d21df0.pth +- Name: setr_mla_512x512_160k_b16_ade20k + In Collection: SETR + Metadata: + backbone: ViT-L + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 190.48 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 17.3 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.54 + mIoU(ms+flip): 49.37 + Config: configs/setr/setr_mla_512x512_160k_b16_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_512x512_160k_b16_ade20k/setr_mla_512x512_160k_b16_ade20k_20210619_191057-f9741de7.pth +- Name: setr_vit-large_naive_8x1_768x768_80k_cityscapes + In Collection: SETR + Metadata: + backbone: ViT-L + crop size: (768,768) + lr schd: 80000 + inference time (ms/im): + - value: 2564.1 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (768,768) + Training Memory (GB): 24.06 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.1 + mIoU(ms+flip): 80.22 + Config: configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_naive_vit-large_8x1_768x768_80k_cityscapes/setr_naive_vit-large_8x1_768x768_80k_cityscapes_20211123_000505-20728e80.pth +- Name: setr_vit-large_pup_8x1_768x768_80k_cityscapes + In Collection: SETR + Metadata: + backbone: ViT-L + crop size: (768,768) + lr schd: 80000 + inference time (ms/im): + - value: 2702.7 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (768,768) + Training Memory (GB): 27.96 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.21 + mIoU(ms+flip): 81.02 + Config: configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_pup_vit-large_8x1_768x768_80k_cityscapes/setr_pup_vit-large_8x1_768x768_80k_cityscapes_20211122_155115-f6f37b8f.pth +- Name: setr_vit-large_mla_8x1_768x768_80k_cityscapes + In Collection: SETR + Metadata: + backbone: ViT-L + crop size: (768,768) + lr schd: 80000 + inference time (ms/im): + - value: 2439.02 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (768,768) + Training Memory (GB): 24.1 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.0 + mIoU(ms+flip): 79.59 + Config: configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/setr/setr_mla_vit-large_8x1_768x768_80k_cityscapes/setr_mla_vit-large_8x1_768x768_80k_cityscapes_20211119_101003-7f8dccbe.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_mla_512x512_160k_b16_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_mla_512x512_160k_b16_ade20k.py new file mode 100644 index 0000000..c8418c6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_mla_512x512_160k_b16_ade20k.py @@ -0,0 +1,4 @@ +_base_ = ['./setr_mla_512x512_160k_b8_ade20k.py'] + +# num_gpus: 8 -> batch_size: 16 +data = dict(samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_mla_512x512_160k_b8_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_mla_512x512_160k_b8_ade20k.py new file mode 100644 index 0000000..e1a07ce --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_mla_512x512_160k_b8_ade20k.py @@ -0,0 +1,85 @@ +_base_ = [ + '../_base_/models/setr_mla.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained=None, + backbone=dict( + img_size=(512, 512), + drop_rate=0., + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + decode_head=dict(num_classes=150), + auxiliary_head=[ + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=0, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=1, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=2, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='FCNHead', + in_channels=256, + channels=256, + in_index=3, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=0, + kernel_size=1, + concat_input=False, + num_classes=150, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)), +) + +optimizer = dict( + lr=0.001, + weight_decay=0.0, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) + +# num_gpus: 8 -> batch_size: 8 +data = dict(samples_per_gpu=1) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_naive_512x512_160k_b16_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_naive_512x512_160k_b16_ade20k.py new file mode 100644 index 0000000..8ad8c9f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_naive_512x512_160k_b16_ade20k.py @@ -0,0 +1,67 @@ +_base_ = [ + '../_base_/models/setr_naive.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained=None, + backbone=dict( + img_size=(512, 512), + drop_rate=0., + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + decode_head=dict(num_classes=150), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=1, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) + ], + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)), +) + +optimizer = dict( + lr=0.01, + weight_decay=0.0, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) + +# num_gpus: 8 -> batch_size: 16 +data = dict(samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_pup_512x512_160k_b16_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_pup_512x512_160k_b16_ade20k.py new file mode 100644 index 0000000..83997a2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_pup_512x512_160k_b16_ade20k.py @@ -0,0 +1,67 @@ +_base_ = [ + '../_base_/models/setr_pup.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained=None, + backbone=dict( + img_size=(512, 512), + drop_rate=0., + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + decode_head=dict(num_classes=150), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=150, + dropout_ratio=0, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + num_convs=2, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + ], + test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(341, 341)), +) + +optimizer = dict( + lr=0.001, + weight_decay=0.0, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) + +# num_gpus: 8 -> batch_size: 16 +data = dict(samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py new file mode 100644 index 0000000..4237cd5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_vit-large_mla_8x1_768x768_80k_cityscapes.py @@ -0,0 +1,17 @@ +_base_ = [ + '../_base_/models/setr_mla.py', '../_base_/datasets/cityscapes_768x768.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + pretrained=None, + backbone=dict( + drop_rate=0, + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + test_cfg=dict(mode='slide', crop_size=(768, 768), stride=(512, 512))) + +optimizer = dict( + lr=0.002, + weight_decay=0.0, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) +data = dict(samples_per_gpu=1) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py new file mode 100644 index 0000000..0c6621e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_vit-large_naive_8x1_768x768_80k_cityscapes.py @@ -0,0 +1,18 @@ +_base_ = [ + '../_base_/models/setr_naive.py', + '../_base_/datasets/cityscapes_768x768.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + pretrained=None, + backbone=dict( + drop_rate=0., + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + test_cfg=dict(mode='slide', crop_size=(768, 768), stride=(512, 512))) + +optimizer = dict( + weight_decay=0.0, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) + +data = dict(samples_per_gpu=1) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py new file mode 100644 index 0000000..e108988 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/setr/setr_vit-large_pup_8x1_768x768_80k_cityscapes.py @@ -0,0 +1,64 @@ +_base_ = [ + '../_base_/models/setr_pup.py', '../_base_/datasets/cityscapes_768x768.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +norm_cfg = dict(type='SyncBN', requires_grad=True) +crop_size = (768, 768) +model = dict( + pretrained=None, + backbone=dict( + drop_rate=0., + init_cfg=dict( + type='Pretrained', checkpoint='pretrain/vit_large_p16.pth')), + auxiliary_head=[ + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=0, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=2, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=1, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=2, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + dict( + type='SETRUPHead', + in_channels=1024, + channels=256, + in_index=2, + num_classes=19, + dropout_ratio=0, + norm_cfg=norm_cfg, + num_convs=2, + up_scale=4, + kernel_size=3, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)) + ], + test_cfg=dict(mode='slide', crop_size=crop_size, stride=(512, 512))) + +optimizer = dict( + weight_decay=0.0, + paramwise_cfg=dict(custom_keys={'head': dict(lr_mult=10.)})) + +data = dict(samples_per_gpu=1) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/README.md new file mode 100644 index 0000000..1c6d70a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/README.md @@ -0,0 +1,73 @@ +# STDC + +[Rethinking BiSeNet For Real-time Semantic Segmentation](https://arxiv.org/abs/2104.13188) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +BiSeNet has been proved to be a popular two-stream network for real-time segmentation. However, its principle of adding an extra path to encode spatial information is time-consuming, and the backbones borrowed from pretrained tasks, e.g., image classification, may be inefficient for image segmentation due to the deficiency of task-specific design. To handle these problems, we propose a novel and efficient structure named Short-Term Dense Concatenate network (STDC network) by removing structure redundancy. Specifically, we gradually reduce the dimension of feature maps and use the aggregation of them for image representation, which forms the basic module of STDC network. In the decoder, we propose a Detail Aggregation module by integrating the learning of spatial information into low-level layers in single-stream manner. Finally, the low-level features and deep features are fused to predict the final segmentation results. Extensive experiments on Cityscapes and CamVid dataset demonstrate the effectiveness of our method by achieving promising trade-off between segmentation accuracy and inference speed. On Cityscapes, we achieve 71.9% mIoU on the test set with a speed of 250.4 FPS on NVIDIA GTX 1080Ti, which is 45.2% faster than the latest methods, and achieve 76.8% mIoU with 97.0 FPS while inferring on higher resolution images. + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{fan2021rethinking, + title={Rethinking BiSeNet For Real-time Semantic Segmentation}, + author={Fan, Mingyuan and Lai, Shenqi and Huang, Junshi and Wei, Xiaoming and Chai, Zhenhua and Luo, Junfeng and Wei, Xiaolin}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={9716--9725}, + year={2021} +} +``` + +## Usage + +We have provided [ImageNet Pretrained STDCNet Weights](https://drive.google.com/drive/folders/1wROFwRt8qWHD4jSo8Zu1gp1d6oYJ3ns1) models converted from [official repo](https://github.com/MichaelFan01/STDC-Seg). + +If you want to convert keys on your own to use official repositories' pre-trained models, we also provide a script [`stdc2mmseg.py`](../../tools/model_converters/stdc2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/MichaelFan01/STDC-Seg) to MMSegmentation style. + +```shell +python tools/model_converters/stdc2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} ${STDC_TYPE} +``` + +E.g. + +```shell +python tools/model_converters/stdc2mmseg.py ./STDCNet813M_73.91.tar ./pretrained/stdc1.pth STDC1 + +python tools/model_converters/stdc2mmseg.py ./STDCNet1446_76.47.tar ./pretrained/stdc2.pth STDC2 +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| -------------------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------- | ------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| STDC 1 (No Pretrain) | STDC1 | 512x1024 | 80000 | 7.15 | 23.06 | 71.82 | 73.89 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/stdc/stdc1_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048-74e6920a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048.log.json) | +| STDC 1 | STDC1 | 512x1024 | 80000 | - | - | 74.94 | 76.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648-3d4c2981.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648.log.json) | +| STDC 2 (No Pretrain) | STDC2 | 512x1024 | 80000 | 8.27 | 23.71 | 73.15 | 76.13 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/stdc/stdc2_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015-fb1e3a1a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015.log.json) | +| STDC 2 | STDC2 | 512x1024 | 80000 | - | - | 76.67 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048-1f8f0f6c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048.log.json) | + +Note: + +- For STDC on Cityscapes dataset, default setting is 4 GPUs with 12 samples per GPU in training. +- `No Pretrain` means the model is trained from scratch. +- The FPS is for reference only. The environment is also different from paper setting, whose input size is `512x1024` and `768x1536`, i.e., 50% and 75% of our input size, respectively and using TensorRT. +- The parameter `fusion_kernel` in `STDCHead` is not learnable. In official repo, `find_unused_parameters=True` is set [here](https://github.com/MichaelFan01/STDC-Seg/blob/59ff37fbd693b99972c76fcefe97caa14aeb619f/train.py#L220). You may check it by printing model parameters of original repo on your own. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc.yml new file mode 100644 index 0000000..f584b74 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc.yml @@ -0,0 +1,87 @@ +Collections: +- Name: STDC + Metadata: + Training Data: + - Cityscapes + Paper: + URL: https://arxiv.org/abs/2104.13188 + Title: Rethinking BiSeNet For Real-time Semantic Segmentation + README: configs/stdc/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.20.0/mmseg/models/backbones/stdc.py#L394 + Version: v0.20.0 + Converted From: + Code: https://github.com/MichaelFan01/STDC-Seg +Models: +- Name: stdc1_512x1024_80k_cityscapes + In Collection: STDC + Metadata: + backbone: STDC1 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 43.37 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 7.15 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 71.82 + mIoU(ms+flip): 73.89 + Config: configs/stdc/stdc1_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_512x1024_80k_cityscapes/stdc1_512x1024_80k_cityscapes_20220224_073048-74e6920a.pth +- Name: stdc1_in1k-pre_512x1024_80k_cityscapes + In Collection: STDC + Metadata: + backbone: STDC1 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 74.94 + mIoU(ms+flip): 76.97 + Config: configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes/stdc1_in1k-pre_512x1024_80k_cityscapes_20220224_141648-3d4c2981.pth +- Name: stdc2_512x1024_80k_cityscapes + In Collection: STDC + Metadata: + backbone: STDC2 + crop size: (512,1024) + lr schd: 80000 + inference time (ms/im): + - value: 42.18 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 8.27 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 73.15 + mIoU(ms+flip): 76.13 + Config: configs/stdc/stdc2_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_512x1024_80k_cityscapes/stdc2_512x1024_80k_cityscapes_20220222_132015-fb1e3a1a.pth +- Name: stdc2_in1k-pre_512x1024_80k_cityscapes + In Collection: STDC + Metadata: + backbone: STDC2 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.67 + mIoU(ms+flip): 78.67 + Config: configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes/stdc2_in1k-pre_512x1024_80k_cityscapes_20220224_073048-1f8f0f6c.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc1_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc1_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..849e771 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc1_512x1024_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/stdc.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +lr_config = dict(warmup='linear', warmup_iters=1000) +data = dict( + samples_per_gpu=12, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..f295bf4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc1_in1k-pre_512x1024_80k_cityscapes.py @@ -0,0 +1,6 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/stdc/stdc1_20220308-5368626c.pth' # noqa +_base_ = './stdc1_512x1024_80k_cityscapes.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint)))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc2_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc2_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..f7afb50 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc2_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './stdc1_512x1024_80k_cityscapes.py' +model = dict(backbone=dict(backbone_cfg=dict(stdc_type='STDCNet2'))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..4148ac4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/stdc/stdc2_in1k-pre_512x1024_80k_cityscapes.py @@ -0,0 +1,6 @@ +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/stdc/stdc2_20220308-7dbd9127.pth' # noqa +_base_ = './stdc2_512x1024_80k_cityscapes.py' +model = dict( + backbone=dict( + backbone_cfg=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint)))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/README.md new file mode 100644 index 0000000..6b21b6d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/README.md @@ -0,0 +1,76 @@ +# Swin Transformer + +[Swin Transformer: Hierarchical Vision Transformer using Shifted Windows](https://arxiv.org/abs/2103.14030) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +This paper presents a new vision Transformer, called Swin Transformer, that capably serves as a general-purpose backbone for computer vision. Challenges in adapting Transformer from language to vision arise from differences between the two domains, such as large variations in the scale of visual entities and the high resolution of pixels in images compared to words in text. To address these differences, we propose a hierarchical Transformer whose representation is computed with Shifted windows. The shifted windowing scheme brings greater efficiency by limiting self-attention computation to non-overlapping local windows while also allowing for cross-window connection. This hierarchical architecture has the flexibility to model at various scales and has linear computational complexity with respect to image size. These qualities of Swin Transformer make it compatible with a broad range of vision tasks, including image classification (87.3 top-1 accuracy on ImageNet-1K) and dense prediction tasks such as object detection (58.7 box AP and 51.1 mask AP on COCO test-dev) and semantic segmentation (53.5 mIoU on ADE20K val). Its performance surpasses the previous state-of-the-art by a large margin of +2.7 box AP and +2.6 mask AP on COCO, and +3.2 mIoU on ADE20K, demonstrating the potential of Transformer-based models as vision backbones. The hierarchical design and the shifted window approach also prove beneficial for all-MLP architectures. The code and models are publicly available at [this https URL](https://github.com/microsoft/Swin-Transformer). + + + +
+ +
+ +## Citation + +```bibtex +@article{liu2021Swin, + title={Swin Transformer: Hierarchical Vision Transformer using Shifted Windows}, + author={Liu, Ze and Lin, Yutong and Cao, Yue and Hu, Han and Wei, Yixuan and Zhang, Zheng and Lin, Stephen and Guo, Baining}, + journal={arXiv preprint arXiv:2103.14030}, + year={2021} +} +``` + +## Usage + +We have provided pretrained models converted from [official repo](https://github.com/microsoft/Swin-Transformer). + +If you want to convert keys on your own to use official repositories' pre-trained models, we also provide a script [`swin2mmseg.py`](../../tools/model_converters/swin2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/SwinTransformer/Swin-Transformer-Semantic-Segmentation) to MMSegmentation style. + +```shell +python tools/model_converters/swin2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/swin2mmseg.py https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth pretrain/swin_base_patch4_window7_224.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +In our default setting, pretrained models and their corresponding [original models](https://github.com/microsoft/Swin-Transforme) models could be defined below: + +| pretrained models | original models | +| ---------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------- | +| pretrain/swin_tiny_patch4_window7_224.pth | [swin_tiny_patch4_window7_224.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth) | +| pretrain/swin_small_patch4_window7_224.pth | [swin_small_patch4_window7_224.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth) | +| pretrain/swin_base_patch4_window7_224.pth | [swin_base_patch4_window7_224.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224.pth) | +| pretrain/swin_base_patch4_window7_224_22k.pth | [swin_base_patch4_window7_224_22k.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window7_224_22k.pth) | +| pretrain/swin_base_patch4_window12_384.pth | [swin_base_patch4_window12_384.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth) | +| pretrain/swin_base_patch4_window12_384_22k.pth | [swin_base_patch4_window12_384_22k.pth](https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth) | + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | pretrain | pretrain img size | Batch Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------------ | ----------------- | ---------- | ------- | -------- | -------------- | ----- | ------------: | ------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | Swin-T | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 5.02 | 21.06 | 44.41 | 45.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542.log.json) | +| UPerNet | Swin-S | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 6.17 | 14.72 | 47.72 | 49.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015-ee2fff1c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-1K | 224x224 | 16 | 160000 | 7.61 | 12.65 | 47.99 | 49.57 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340-593b0e13.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-22K | 224x224 | 16 | 160000 | - | - | 50.31 | 51.9 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-1K | 384x384 | 16 | 160000 | 8.52 | 12.10 | 48.35 | 49.65 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020-05b22ea4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020.log.json) | +| UPerNet | Swin-B | 512x512 | ImageNet-22K | 384x384 | 16 | 160000 | - | - | 50.76 | 52.4 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459-429057bf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/swin.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/swin.yml new file mode 100644 index 0000000..ef21d21 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/swin.yml @@ -0,0 +1,117 @@ +Models: +- Name: upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K + In Collection: UPerNet + Metadata: + backbone: Swin-T + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 47.48 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 5.02 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.41 + mIoU(ms+flip): 45.79 + Config: configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210531_112542-e380ad3e.pth +- Name: upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K + In Collection: UPerNet + Metadata: + backbone: Swin-S + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 67.93 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.17 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.72 + mIoU(ms+flip): 49.24 + Config: configs/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192015-ee2fff1c.pth +- Name: upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K + In Collection: UPerNet + Metadata: + backbone: Swin-B + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 79.05 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.61 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.99 + mIoU(ms+flip): 49.57 + Config: configs/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K_20210526_192340-593b0e13.pth +- Name: upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K + In Collection: UPerNet + Metadata: + backbone: Swin-B + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 50.31 + mIoU(ms+flip): 51.9 + Config: configs/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K_20210526_211650-762e2178.pth +- Name: upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K + In Collection: UPerNet + Metadata: + backbone: Swin-B + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 82.64 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.52 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.35 + mIoU(ms+flip): 49.65 + Config: configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K_20210531_132020-05b22ea4.pth +- Name: upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K + In Collection: UPerNet + Metadata: + backbone: Swin-B + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 50.76 + mIoU(ms+flip): 52.4 + Config: configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K_20210531_125459-429057bf.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K.py new file mode 100644 index 0000000..027bd6f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_1K.py @@ -0,0 +1,15 @@ +_base_ = [ + 'upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_' + 'pretrain_224x224_1K.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + pretrain_img_size=384, + embed_dims=128, + depths=[2, 2, 18, 2], + num_heads=[4, 8, 16, 32], + window_size=12), + decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), + auxiliary_head=dict(in_channels=512, num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K.py new file mode 100644 index 0000000..e662d4f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window12_512x512_160k_ade20k_pretrain_384x384_22K.py @@ -0,0 +1,8 @@ +_base_ = [ + './upernet_swin_base_patch4_window12_512x512_160k_ade20k_' + 'pretrain_384x384_1K.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_22k_20220317-e5c09f74.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py new file mode 100644 index 0000000..6e05677 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py @@ -0,0 +1,13 @@ +_base_ = [ + './upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_' + 'pretrain_224x224_1K.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_20220317-e9b98025.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + embed_dims=128, + depths=[2, 2, 18, 2], + num_heads=[4, 8, 16, 32]), + decode_head=dict(in_channels=[128, 256, 512, 1024], num_classes=150), + auxiliary_head=dict(in_channels=512, num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K.py new file mode 100644 index 0000000..7a9c506 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_base_patch4_window7_512x512_160k_ade20k_pretrain_224x224_22K.py @@ -0,0 +1,8 @@ +_base_ = [ + './upernet_swin_base_patch4_window7_512x512_160k_ade20k_' + 'pretrain_224x224_1K.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window7_224_22k_20220317-4f79f7c0.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py new file mode 100644 index 0000000..1958e0e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_small_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py @@ -0,0 +1,11 @@ +_base_ = [ + './upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_' + 'pretrain_224x224_1K.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_small_patch4_window7_224_20220317-7ba6d6dd.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + depths=[2, 2, 18, 2]), + decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150), + auxiliary_head=dict(in_channels=384, num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py new file mode 100644 index 0000000..6d8c413 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/swin/upernet_swin_tiny_patch4_window7_512x512_160k_ade20k_pretrain_224x224_1K.py @@ -0,0 +1,45 @@ +_base_ = [ + '../_base_/models/upernet_swin.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_tiny_patch4_window7_224_20220317-1cdeb081.pth' # noqa +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file), + embed_dims=96, + depths=[2, 2, 6, 2], + num_heads=[3, 6, 12, 24], + window_size=7, + use_abs_pos_embed=False, + drop_path_rate=0.3, + patch_norm=True), + decode_head=dict(in_channels=[96, 192, 384, 768], num_classes=150), + auxiliary_head=dict(in_channels=384, num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'absolute_pos_embed': dict(decay_mult=0.), + 'relative_position_bias_table': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/README.md new file mode 100644 index 0000000..639d074 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/README.md @@ -0,0 +1,76 @@ +# Twins + +[Twins: Revisiting the Design of Spatial Attention in Vision Transformers](https://arxiv.org/pdf/2104.13840.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Very recently, a variety of vision transformer architectures for dense prediction tasks have been proposed and they show that the design of spatial attention is critical to their success in these tasks. In this work, we revisit the design of the spatial attention and demonstrate that a carefully-devised yet simple spatial attention mechanism performs favourably against the state-of-the-art schemes. As a result, we propose two vision transformer architectures, namely, Twins-PCPVT and Twins-SVT. Our proposed architectures are highly-efficient and easy to implement, only involving matrix multiplications that are highly optimized in modern deep learning frameworks. More importantly, the proposed architectures achieve excellent performance on a wide range of visual tasks, including image level classification as well as dense detection and segmentation. The simplicity and strong performance suggest that our proposed architectures may serve as stronger backbones for many vision tasks. Our code is released at [this https URL](https://github.com/Meituan-AutoML/Twins). + + + +
+ +
+ +## Citation + +```bibtex +@article{chu2021twins, + title={Twins: Revisiting spatial attention design in vision transformers}, + author={Chu, Xiangxiang and Tian, Zhi and Wang, Yuqing and Zhang, Bo and Ren, Haibing and Wei, Xiaolin and Xia, Huaxia and Shen, Chunhua}, + journal={arXiv preprint arXiv:2104.13840}, + year={2021}altgvt +} +``` + +## Usage + +We have provided pretrained models converted from [official repo](https://github.com/Meituan-AutoML/Twins). + +If you want to convert keys on your own to use official repositories' pre-trained models, we also provide a script [`twins2mmseg.py`](../../tools/model_converters/twins2mmseg.py) in the tools directory to convert the key of models from [the official repo](https://github.com/Meituan-AutoML/Twins) to MMSegmentation style. + +```shell +python tools/model_converters/twins2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} ${MODEL_TYPE} +``` + +This script convert `pcpvt` or `svt` pretrained model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +For example, + +```shell +python tools/model_converters/twins2mmseg.py ./alt_gvt_base.pth ./pretrained/alt_gvt_base.pth svt +``` + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------------------- | -------- | --------- | ------- | -------- | -------------- | ----- | ------------- | ------------------------------------------------------------------------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Twins-FPN | PCPVT-S | 512x512 | 80000 | 6.60 | 27.15 | 43.26 | 44.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132-41acd132.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132.log.json) | +| Twins-UPerNet | PCPVT-S | 512x512 | 160000 | 9.67 | 14.24 | 46.04 | 46.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537-8e99c07a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537.log.json) | +| Twins-FPN | PCPVT-B | 512x512 | 80000 | 8.41 | 19.67 | 45.66 | 46.48 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019-d396db72.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019.log.json) | +| Twins-UPerNet (8x2) | PCPVT-B | 512x512 | 160000 | 6.46 | 12.04 | 47.91 | 48.64 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020-02094ea5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020.log.json) | +| Twins-FPN | PCPVT-L | 512x512 | 80000 | 10.78 | 14.32 | 45.94 | 46.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226-bc6d61dc.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226.log.json) | +| Twins-UPerNet (8x2) | PCPVT-L | 512x512 | 160000 | 7.82 | 10.70 | 49.35 | 50.08 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053-c6095c07.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053.log.json) | +| Twins-FPN | SVT-S | 512x512 | 80000 | 5.80 | 29.79 | 44.47 | 45.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006-0a0d3317.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006.log.json) | +| Twins-UPerNet (8x2) | SVT-S | 512x512 | 160000 | 4.93 | 15.09 | 46.08 | 46.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005-e48a2d94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json) | +| Twins-FPN | SVT-B | 512x512 | 80000 | 8.75 | 21.10 | 46.77 | 47.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849-88b2907c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849.log.json) | +| Twins-UPerNet (8x2) | SVT-B | 512x512 | 160000 | 6.77 | 12.66 | 48.04 | 48.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826-0943a1f1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826.log.json) | +| Twins-FPN | SVT-L | 512x512 | 80000 | 11.20 | 17.80 | 46.55 | 47.74 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005-1d59bee2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005.log.json) | +| Twins-UPerNet (8x2) | SVT-L | 512x512 | 160000 | 8.41 | 10.73 | 49.65 | 50.63 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005-3e2cae61.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005.log.json) | + +Note: + +- `8x2` means 8 GPUs with 2 samples per GPU in training. Default setting of Twins on ADE20K is 8 GPUs with 4 samples per GPU in training. +- `UPerNet` and `FPN` are decoder heads utilized in corresponding Twins model, which is `UPerHead` and `FPNHead`, respectively. Specifically, models in [official repo](https://github.com/Meituan-AutoML/Twins) all use `UPerHead`. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins.yml new file mode 100644 index 0000000..6b5f5c1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins.yml @@ -0,0 +1,265 @@ +Models: +- Name: twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k + In Collection: FPN + Metadata: + backbone: PCPVT-S + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 36.83 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.6 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.26 + mIoU(ms+flip): 44.11 + Config: configs/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_204132-41acd132.pth +- Name: twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: PCPVT-S + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 70.22 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.67 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.04 + mIoU(ms+flip): 46.92 + Config: configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k_20211201_233537-8e99c07a.pth +- Name: twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k + In Collection: FPN + Metadata: + backbone: PCPVT-B + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 50.84 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.41 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.66 + mIoU(ms+flip): 46.48 + Config: configs/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141019-d396db72.pth +- Name: twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: PCPVT-B + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 83.06 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.46 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.91 + mIoU(ms+flip): 48.64 + Config: configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k_20211130_141020-02094ea5.pth +- Name: twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k + In Collection: FPN + Metadata: + backbone: PCPVT-L + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 69.83 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 10.78 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.94 + mIoU(ms+flip): 46.7 + Config: configs/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_105226-bc6d61dc.pth +- Name: twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: PCPVT-L + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 93.46 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.82 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.35 + mIoU(ms+flip): 50.08 + Config: configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k_20211201_075053-c6095c07.pth +- Name: twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k + In Collection: FPN + Metadata: + backbone: SVT-S + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 33.57 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 5.8 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 44.47 + mIoU(ms+flip): 45.42 + Config: configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141006-0a0d3317.pth +- Name: twins_svt-s_uperhead_8x2_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: SVT-S + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 66.27 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 4.93 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.08 + mIoU(ms+flip): 46.96 + Config: configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k/twins_svt-s_uperhead_8x2_512x512_160k_ade20k_20211130_141005-e48a2d94.pth +- Name: twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k + In Collection: FPN + Metadata: + backbone: SVT-B + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 47.39 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.75 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.77 + mIoU(ms+flip): 47.47 + Config: configs/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k_20211201_113849-88b2907c.pth +- Name: twins_svt-b_uperhead_8x2_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: SVT-B + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 78.99 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.77 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 48.04 + mIoU(ms+flip): 48.87 + Config: configs/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k/twins_svt-b_uperhead_8x2_512x512_160k_ade20k_20211202_040826-0943a1f1.pth +- Name: twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k + In Collection: FPN + Metadata: + backbone: SVT-L + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 56.18 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 11.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.55 + mIoU(ms+flip): 47.74 + Config: configs/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k_20211130_141005-1d59bee2.pth +- Name: twins_svt-l_uperhead_8x2_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: SVT-L + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 93.2 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.41 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 49.65 + mIoU(ms+flip): 50.63 + Config: configs/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k/twins_svt-l_uperhead_8x2_512x512_160k_ade20k_20211130_141005-3e2cae61.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py new file mode 100644 index 0000000..b79fefd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = ['./twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_base_20220308-0621964c.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + depths=[3, 4, 18, 3]), ) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py new file mode 100644 index 0000000..8c299d3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-b_uperhead_8x2_512x512_160k_ade20k.py @@ -0,0 +1,11 @@ +_base_ = ['./twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_base_20220308-0621964c.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + depths=[3, 4, 18, 3], + drop_path_rate=0.3)) + +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py new file mode 100644 index 0000000..abb652e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = ['./twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_large_20220308-37579dc6.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + depths=[3, 8, 27, 3])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py new file mode 100644 index 0000000..f6f7d27 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-l_uperhead_8x2_512x512_160k_ade20k.py @@ -0,0 +1,11 @@ +_base_ = ['./twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/pcpvt_large_20220308-37579dc6.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + depths=[3, 8, 27, 3], + drop_path_rate=0.3)) + +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py new file mode 100644 index 0000000..3d7be96 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/twins_pcpvt-s_fpn.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py new file mode 100644 index 0000000..c888b92 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_pcpvt-s_uperhead_8x4_512x512_160k_ade20k.py @@ -0,0 +1,26 @@ +_base_ = [ + '../_base_/models/twins_pcpvt-s_upernet.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] + +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict(custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py new file mode 100644 index 0000000..00d8957 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-b_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -0,0 +1,12 @@ +_base_ = ['./twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_base_20220308-1b7eb711.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[96, 192, 384, 768], + num_heads=[3, 6, 12, 24], + depths=[2, 2, 18, 2]), + neck=dict(in_channels=[96, 192, 384, 768]), +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k.py new file mode 100644 index 0000000..a969fed --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-b_uperhead_8x2_512x512_160k_ade20k.py @@ -0,0 +1,12 @@ +_base_ = ['./twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_base_20220308-1b7eb711.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[96, 192, 384, 768], + num_heads=[3, 6, 12, 24], + depths=[2, 2, 18, 2]), + decode_head=dict(in_channels=[96, 192, 384, 768]), + auxiliary_head=dict(in_channels=384)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py new file mode 100644 index 0000000..c68bfd4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-l_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -0,0 +1,13 @@ +_base_ = ['./twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_large_20220308-fb5936f3.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[128, 256, 512, 1024], + num_heads=[4, 8, 16, 32], + depths=[2, 2, 18, 2], + drop_path_rate=0.3), + neck=dict(in_channels=[128, 256, 512, 1024]), +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py new file mode 100644 index 0000000..f98c070 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-l_uperhead_8x2_512x512_160k_ade20k.py @@ -0,0 +1,13 @@ +_base_ = ['./twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py'] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_large_20220308-fb5936f3.pth' # noqa + +model = dict( + backbone=dict( + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[128, 256, 512, 1024], + num_heads=[4, 8, 16, 32], + depths=[2, 2, 18, 2], + drop_path_rate=0.3), + decode_head=dict(in_channels=[128, 256, 512, 1024]), + auxiliary_head=dict(in_channels=512)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py new file mode 100644 index 0000000..dbb944c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-s_fpn_fpnhead_8x4_512x512_80k_ade20k.py @@ -0,0 +1,22 @@ +_base_ = [ + '../_base_/models/twins_pcpvt-s_fpn.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_small_20220308-7e1c3695.pth' # noqa + +model = dict( + backbone=dict( + type='SVT', + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[64, 128, 256, 512], + num_heads=[2, 4, 8, 16], + mlp_ratios=[4, 4, 4, 4], + depths=[2, 2, 10, 4], + windiow_sizes=[7, 7, 7, 7], + norm_after_stage=True), + neck=dict(in_channels=[64, 128, 256, 512], out_channels=256, num_outs=4), + decode_head=dict(num_classes=150), +) + +optimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py new file mode 100644 index 0000000..44bf60b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/twins/twins_svt-s_uperhead_8x2_512x512_160k_ade20k.py @@ -0,0 +1,43 @@ +_base_ = [ + '../_base_/models/twins_pcpvt-s_upernet.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] + +checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/twins/alt_gvt_small_20220308-7e1c3695.pth' # noqa + +model = dict( + backbone=dict( + type='SVT', + init_cfg=dict(type='Pretrained', checkpoint=checkpoint), + embed_dims=[64, 128, 256, 512], + num_heads=[2, 4, 8, 16], + mlp_ratios=[4, 4, 4, 4], + depths=[2, 2, 10, 4], + windiow_sizes=[7, 7, 7, 7], + norm_after_stage=True), + decode_head=dict(in_channels=[64, 128, 256, 512]), + auxiliary_head=dict(in_channels=256)) + +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict(custom_keys={ + 'pos_block': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +data = dict(samples_per_gpu=2, workers_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/README.md new file mode 100644 index 0000000..f17e174 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/README.md @@ -0,0 +1,92 @@ +# UNet + +[U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +There is large consent that successful training of deep networks requires many thousand annotated training samples. In this paper, we present a network and training strategy that relies on the strong use of data augmentation to use the available annotated samples more efficiently. The architecture consists of a contracting path to capture context and a symmetric expanding path that enables precise localization. We show that such a network can be trained end-to-end from very few images and outperforms the prior best method (a sliding-window convolutional network) on the ISBI challenge for segmentation of neuronal structures in electron microscopic stacks. Using the same network trained on transmitted light microscopy images (phase contrast and DIC) we won the ISBI cell tracking challenge 2015 in these categories by a large margin. Moreover, the network is fast. Segmentation of a 512x512 image takes less than a second on a recent GPU. The full implementation (based on Caffe) and the trained networks are available at [this http URL](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/). + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{ronneberger2015u, + title={U-net: Convolutional networks for biomedical image segmentation}, + author={Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas}, + booktitle={International Conference on Medical image computing and computer-assisted intervention}, + pages={234--241}, + year={2015}, + organization={Springer} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Loss | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ---------- | ----------- | ------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 512x1024 | 160000 | 17.91 | 3.05 | 69.10 | 71.05 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204.log.json) | + +### DRIVE + +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ----: | ----: | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.680 | - | 88.38 | 78.67 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive_20201223_191051-5daf6d3b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_64x64_40k_drive/unet_s5-d16_64x64_40k_drive-20201223_191051.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.582 | - | 88.71 | 79.32 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820-785de5c2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.599 | - | 88.35 | 78.62 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive_20201227_181818-aac73387.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.585 | - | 88.76 | 79.42 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821-22b3e3ba.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 584x565 | 64x64 | 42x42 | 40000 | 0.596 | - | 88.38 | 78.69 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive_20201226_094047-0671ff20.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 584x565 | 64x64 | 42x42 | 40000 | 0.582 | - | 88.84 | 79.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825-6bf0efd7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825.log.json) | + +### STARE + +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ----: | ----: | ------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.968 | - | 89.78 | 81.02 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare_20201223_191051-7d77e78b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_stare/unet_s5-d16_128x128_40k_stare-20201223_191051.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 0.986 | - | 90.65 | 82.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821-f75705a9.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.982 | - | 89.89 | 81.22 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare_20201227_181818-3c2923c4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 1.028 | - | 90.72 | 82.84 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823-f1063ef7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 605x700 | 128x128 | 85x85 | 40000 | 0.999 | - | 89.73 | 80.93 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare_20201226_094047-93dcb93c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 605x700 | 128x128 | 85x85 | 40000 | 1.010 | - | 90.65 | 82.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825-21db614c.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825.log.json) | + +### CHASE_DB1 + +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | -----: | ------- | -------- | -------------: | ----: | ----: | ---------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.968 | - | 89.46 | 80.24 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1_20201223_191051-11543527.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_128x128_40k_chase_db1/unet_s5-d16_128x128_40k_chase_db1-20201223_191051.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 0.986 | - | 89.52 | 80.40 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821-1c4eb7cf.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.982 | - | 89.52 | 80.36 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1_20201227_181818-68d4e609.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 1.028 | - | 89.45 | 80.28 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823-c0802c4d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 960x999 | 128x128 | 85x85 | 40000 | 0.999 | - | 89.57 | 80.47 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1_20201226_094047-4c5aefa3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 960x999 | 128x128 | 85x85 | 40000 | 1.010 | - | 89.49 | 80.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825-4ef29df5.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825.log.json) | + +### HRF + +| Method | Backbone | Loss | Image Size | Crop Size | Stride | Lr schd | Mem (GB) | Inf time (fps) | mDice | Dice | config | download | +| ---------------- | ----------- | -------------------- | ---------- | --------- | ------: | ------- | -------- | -------------: | ----: | ----: | ---------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UNet + FCN | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.525 | - | 88.92 | 79.45 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf_20201223_173724-d89cf1ed.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/unet_s5-d16_256x256_40k_hrf/unet_s5-d16_256x256_40k_hrf-20201223_173724.log.json) | +| UNet + FCN | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.623 | - | 89.64 | 80.87 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821-c314da8a.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.588 | - | 89.24 | 80.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf-20201227_181818.log.json) | +| UNet + PSPNet | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.798 | - | 89.69 | 80.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823-53d492fa.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy | 2336x3504 | 256x256 | 170x170 | 40000 | 2.604 | - | 89.32 | 80.21 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf_20201226_094047-3a1fdf85.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf-20201226_094047.log.json) | +| UNet + DeepLabV3 | UNet-S5-D16 | Cross Entropy + Dice | 2336x3504 | 256x256 | 170x170 | 40000 | 2.607 | - | 89.56 | 80.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032-59daf7a4.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032.log.json) | + +Note: + +- In `DRIVE`, `STARE`, `CHASE_DB1`, and `HRF` dataset, `mDice` is mean dice of background and vessel, while `Dice` is dice metric of vessel(foreground) only. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py new file mode 100644 index 0000000..c706cf3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', + '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py new file mode 100644 index 0000000..0ef02dc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/stare.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py new file mode 100644 index 0000000..118428b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/hrf.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py new file mode 100644 index 0000000..1f8862a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/deeplabv3_unet_s5-d16.py', '../_base_/datasets/drive.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py new file mode 100644 index 0000000..1c48cbc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py @@ -0,0 +1,6 @@ +_base_ = './deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py new file mode 100644 index 0000000..1022ede --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py @@ -0,0 +1,6 @@ +_base_ = './deeplabv3_unet_s5-d16_128x128_40k_stare.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py new file mode 100644 index 0000000..fc17da7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py @@ -0,0 +1,6 @@ +_base_ = './deeplabv3_unet_s5-d16_256x256_40k_hrf.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py new file mode 100644 index 0000000..3f1f12e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py @@ -0,0 +1,6 @@ +_base_ = './deeplabv3_unet_s5-d16_64x64_40k_drive.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py new file mode 100644 index 0000000..2bc52d9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/chase_db1.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py new file mode 100644 index 0000000..5d836c6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/stare.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py new file mode 100644 index 0000000..be8eec7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/hrf.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py new file mode 100644 index 0000000..a2f7dbe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py @@ -0,0 +1,16 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] + +model = dict( + decode_head=dict(num_classes=19), + auxiliary_head=dict(num_classes=19), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py new file mode 100644 index 0000000..80483ad --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/fcn_unet_s5-d16.py', '../_base_/datasets/drive.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py new file mode 100644 index 0000000..5264866 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py @@ -0,0 +1,6 @@ +_base_ = './fcn_unet_s5-d16_128x128_40k_chase_db1.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py new file mode 100644 index 0000000..cf5fa1f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py @@ -0,0 +1,6 @@ +_base_ = './fcn_unet_s5-d16_128x128_40k_stare.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py new file mode 100644 index 0000000..a154d7e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py @@ -0,0 +1,6 @@ +_base_ = './fcn_unet_s5-d16_256x256_40k_hrf.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py new file mode 100644 index 0000000..1b8f860 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py @@ -0,0 +1,6 @@ +_base_ = './fcn_unet_s5-d16_64x64_40k_drive.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py new file mode 100644 index 0000000..b085a17 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', + '../_base_/datasets/chase_db1.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py new file mode 100644 index 0000000..9d729ce --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/stare.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(128, 128), stride=(85, 85))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py new file mode 100644 index 0000000..f57c916 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/hrf.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(256, 256), stride=(170, 170))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py new file mode 100644 index 0000000..7b5421a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/pspnet_unet_s5-d16.py', '../_base_/datasets/drive.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] +model = dict(test_cfg=dict(crop_size=(64, 64), stride=(42, 42))) +evaluation = dict(metric='mDice') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py new file mode 100644 index 0000000..a63dc11 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py @@ -0,0 +1,6 @@ +_base_ = './pspnet_unet_s5-d16_128x128_40k_chase_db1.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py new file mode 100644 index 0000000..1a3b665 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py @@ -0,0 +1,6 @@ +_base_ = './pspnet_unet_s5-d16_128x128_40k_stare.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py new file mode 100644 index 0000000..e19d6cf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py @@ -0,0 +1,6 @@ +_base_ = './pspnet_unet_s5-d16_256x256_40k_hrf.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py new file mode 100644 index 0000000..7934923 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py @@ -0,0 +1,6 @@ +_base_ = './pspnet_unet_s5-d16_64x64_40k_drive.py' +model = dict( + decode_head=dict(loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0) + ])) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/unet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/unet.yml new file mode 100644 index 0000000..5bb5014 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/unet/unet.yml @@ -0,0 +1,377 @@ +Collections: +- Name: UNet + Metadata: + Training Data: + - Cityscapes + - DRIVE + - STARE + - CHASE_DB1 + - HRF + Paper: + URL: https://arxiv.org/abs/1505.04597 + Title: 'U-Net: Convolutional Networks for Biomedical Image Segmentation' + README: configs/unet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/backbones/unet.py#L225 + Version: v0.17.0 + Converted From: + Code: http://lmb.informatik.uni-freiburg.de/people/ronneber/u-net +Models: +- Name: fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (512,1024) + lr schd: 160000 + inference time (ms/im): + - value: 327.87 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 17.91 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 69.1 + mIoU(ms+flip): 71.05 + Config: configs/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes/fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes_20211210_145204-6860854e.pth +- Name: fcn_unet_s5-d16_64x64_40k_drive + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (64,64) + lr schd: 40000 + Training Memory (GB): 0.68 + Results: + - Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + Dice: 78.67 + Config: configs/unet/fcn_unet_s5-d16_64x64_40k_drive.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_64x64_40k_drive/fcn_unet_s5-d16_64x64_40k_drive_20201223_191051-5daf6d3b.pth +- Name: fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (64,64) + lr schd: 40000 + Training Memory (GB): 0.582 + Results: + - Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + Dice: 79.32 + Config: configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/fcn_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201820-785de5c2.pth +- Name: pspnet_unet_s5-d16_64x64_40k_drive + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (64,64) + lr schd: 40000 + Training Memory (GB): 0.599 + Results: + - Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + Dice: 78.62 + Config: configs/unet/pspnet_unet_s5-d16_64x64_40k_drive.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_64x64_40k_drive/pspnet_unet_s5-d16_64x64_40k_drive_20201227_181818-aac73387.pth +- Name: pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (64,64) + lr schd: 40000 + Training Memory (GB): 0.585 + Results: + - Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + Dice: 79.42 + Config: configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/pspnet_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201821-22b3e3ba.pth +- Name: deeplabv3_unet_s5-d16_64x64_40k_drive + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (64,64) + lr schd: 40000 + Training Memory (GB): 0.596 + Results: + - Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + Dice: 78.69 + Config: configs/unet/deeplabv3_unet_s5-d16_64x64_40k_drive.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_64x64_40k_drive/deeplabv3_unet_s5-d16_64x64_40k_drive_20201226_094047-0671ff20.pth +- Name: deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (64,64) + lr schd: 40000 + Training Memory (GB): 0.582 + Results: + - Task: Semantic Segmentation + Dataset: DRIVE + Metrics: + Dice: 79.56 + Config: configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_64x64_40k_drive_20211210_201825-6bf0efd7.pth +- Name: fcn_unet_s5-d16_128x128_40k_stare + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 0.968 + Results: + - Task: Semantic Segmentation + Dataset: STARE + Metrics: + Dice: 81.02 + Config: configs/unet/fcn_unet_s5-d16_128x128_40k_stare.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_stare/fcn_unet_s5-d16_128x128_40k_stare_20201223_191051-7d77e78b.pth +- Name: fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 0.986 + Results: + - Task: Semantic Segmentation + Dataset: STARE + Metrics: + Dice: 82.7 + Config: configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201821-f75705a9.pth +- Name: pspnet_unet_s5-d16_128x128_40k_stare + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 0.982 + Results: + - Task: Semantic Segmentation + Dataset: STARE + Metrics: + Dice: 81.22 + Config: configs/unet/pspnet_unet_s5-d16_128x128_40k_stare.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_stare/pspnet_unet_s5-d16_128x128_40k_stare_20201227_181818-3c2923c4.pth +- Name: pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 1.028 + Results: + - Task: Semantic Segmentation + Dataset: STARE + Metrics: + Dice: 82.84 + Config: configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201823-f1063ef7.pth +- Name: deeplabv3_unet_s5-d16_128x128_40k_stare + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 0.999 + Results: + - Task: Semantic Segmentation + Dataset: STARE + Metrics: + Dice: 80.93 + Config: configs/unet/deeplabv3_unet_s5-d16_128x128_40k_stare.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_stare/deeplabv3_unet_s5-d16_128x128_40k_stare_20201226_094047-93dcb93c.pth +- Name: deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 1.01 + Results: + - Task: Semantic Segmentation + Dataset: STARE + Metrics: + Dice: 82.71 + Config: configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_stare_20211210_201825-21db614c.pth +- Name: fcn_unet_s5-d16_128x128_40k_chase_db1 + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 0.968 + Results: + - Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + Dice: 80.24 + Config: configs/unet/fcn_unet_s5-d16_128x128_40k_chase_db1.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_128x128_40k_chase_db1/fcn_unet_s5-d16_128x128_40k_chase_db1_20201223_191051-11543527.pth +- Name: fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1 + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 0.986 + Results: + - Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + Dice: 80.4 + Config: configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/fcn_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201821-1c4eb7cf.pth +- Name: pspnet_unet_s5-d16_128x128_40k_chase_db1 + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 0.982 + Results: + - Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + Dice: 80.36 + Config: configs/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_128x128_40k_chase_db1/pspnet_unet_s5-d16_128x128_40k_chase_db1_20201227_181818-68d4e609.pth +- Name: pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1 + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 1.028 + Results: + - Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + Dice: 80.28 + Config: configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/pspnet_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201823-c0802c4d.pth +- Name: deeplabv3_unet_s5-d16_128x128_40k_chase_db1 + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 0.999 + Results: + - Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + Dice: 80.47 + Config: configs/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_128x128_40k_chase_db1/deeplabv3_unet_s5-d16_128x128_40k_chase_db1_20201226_094047-4c5aefa3.pth +- Name: deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1 + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (128,128) + lr schd: 40000 + Training Memory (GB): 1.01 + Results: + - Task: Semantic Segmentation + Dataset: CHASE_DB1 + Metrics: + Dice: 80.37 + Config: configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_128x128_40k_chase-db1_20211210_201825-4ef29df5.pth +- Name: fcn_unet_s5-d16_256x256_40k_hrf + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (256,256) + lr schd: 40000 + Training Memory (GB): 2.525 + Results: + - Task: Semantic Segmentation + Dataset: HRF + Metrics: + Dice: 79.45 + Config: configs/unet/fcn_unet_s5-d16_256x256_40k_hrf.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_256x256_40k_hrf/fcn_unet_s5-d16_256x256_40k_hrf_20201223_173724-d89cf1ed.pth +- Name: fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (256,256) + lr schd: 40000 + Training Memory (GB): 2.623 + Results: + - Task: Semantic Segmentation + Dataset: HRF + Metrics: + Dice: 80.87 + Config: configs/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/fcn_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201821-c314da8a.pth +- Name: pspnet_unet_s5-d16_256x256_40k_hrf + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (256,256) + lr schd: 40000 + Training Memory (GB): 2.588 + Results: + - Task: Semantic Segmentation + Dataset: HRF + Metrics: + Dice: 80.07 + Config: configs/unet/pspnet_unet_s5-d16_256x256_40k_hrf.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_256x256_40k_hrf/pspnet_unet_s5-d16_256x256_40k_hrf_20201227_181818-fdb7e29b.pth +- Name: pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (256,256) + lr schd: 40000 + Training Memory (GB): 2.798 + Results: + - Task: Semantic Segmentation + Dataset: HRF + Metrics: + Dice: 80.96 + Config: configs/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/pspnet_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_201823-53d492fa.pth +- Name: deeplabv3_unet_s5-d16_256x256_40k_hrf + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (256,256) + lr schd: 40000 + Training Memory (GB): 2.604 + Results: + - Task: Semantic Segmentation + Dataset: HRF + Metrics: + Dice: 80.21 + Config: configs/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_256x256_40k_hrf/deeplabv3_unet_s5-d16_256x256_40k_hrf_20201226_094047-3a1fdf85.pth +- Name: deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf + In Collection: UNet + Metadata: + backbone: UNet-S5-D16 + crop size: (256,256) + lr schd: 40000 + Training Memory (GB): 2.607 + Results: + - Task: Semantic Segmentation + Dataset: HRF + Metrics: + Dice: 80.71 + Config: configs/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/unet/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf/deeplabv3_unet_s5-d16_ce-1.0-dice-3.0_256x256_40k_hrf_20211210_202032-59daf7a4.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/README.md new file mode 100644 index 0000000..d398ddc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/README.md @@ -0,0 +1,74 @@ +# UPerNet + +[Unified Perceptual Parsing for Scene Understanding](https://arxiv.org/pdf/1807.10221.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +Humans recognize the visual world at multiple levels: we effortlessly categorize scenes and detect objects inside, while also identifying the textures and surfaces of the objects along with their different compositional parts. In this paper, we study a new task called Unified Perceptual Parsing, which requires the machine vision systems to recognize as many visual concepts as possible from a given image. A multi-task framework called UPerNet and a training strategy are developed to learn from heterogeneous image annotations. We benchmark our framework on Unified Perceptual Parsing and show that it is able to effectively segment a wide range of concepts from images. The trained networks are further applied to discover visual knowledge in natural scenes. Models are available at [this https URL](https://github.com/CSAILVision/unifiedparsing). + + + +
+ +
+ +## Citation + +```bibtex +@inproceedings{xiao2018unified, + title={Unified perceptual parsing for scene understanding}, + author={Xiao, Tete and Liu, Yingcheng and Zhou, Bolei and Jiang, Yuning and Sun, Jian}, + booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, + pages={418--434}, + year={2018} +} +``` + +## Results and models + +### Cityscapes + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | -------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | R-18 | 512x1024 | 40000 | 4.8 | 4.47 | 75.39 | 77.0 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r18_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x1024_40k_cityscapes/upernet_r18_512x1024_40k_cityscapes_20220615_113231-12ee861d.pth) \|[log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x1024_40k_cityscapes/upernet_r18_512x1024_40k_cityscapes_20220615_113231.log.json) | +| UPerNet | R-50 | 512x1024 | 40000 | 6.4 | 4.25 | 77.10 | 78.37 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827.log.json) | +| UPerNet | R-101 | 512x1024 | 40000 | 7.4 | 3.79 | 78.69 | 80.11 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933.log.json) | +| UPerNet | R-50 | 769x769 | 40000 | 7.2 | 1.76 | 77.98 | 79.70 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048.log.json) | +| UPerNet | R-101 | 769x769 | 40000 | 8.4 | 1.56 | 79.03 | 80.77 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_769x769_40k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819.log.json) | +| UPerNet | R-18 | 512x1024 | 80000 | - | - | 76.02 | 77.38 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r18_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x1024_80k_cityscapes/upernet_r18_512x1024_80k_cityscapes_20220614_110712-c89a9188.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x1024_80k_cityscapes/upernet_r18_512x1024_80k_cityscapes_20220614_110712.log.json) | +| UPerNet | R-50 | 512x1024 | 80000 | - | - | 78.19 | 79.19 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207.log.json) | +| UPerNet | R-101 | 512x1024 | 80000 | - | - | 79.40 | 80.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403.log.json) | +| UPerNet | R-50 | 769x769 | 80000 | - | - | 79.39 | 80.92 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107.log.json) | +| UPerNet | R-101 | 769x769 | 80000 | - | - | 80.10 | 81.49 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_769x769_80k_cityscapes.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014.log.json) | + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ---------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | R-18 | 512x512 | 80000 | 6.6 | 24.76 | 38.76 | 39.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r18_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_80k_ade20k/upernet_r18_512x512_80k_ade20k_20220614_110319-22e81719.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_80k_ade20k/upernet_r18_512x512_80k_ade20k_20220614_110319.log.json) | +| UPerNet | R-50 | 512x512 | 80000 | 8.1 | 23.40 | 40.70 | 41.81 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127.log.json) | +| UPerNet | R-101 | 512x512 | 80000 | 9.1 | 20.34 | 42.91 | 43.96 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117.log.json) | +| UPerNet | R-18 | 512x512 | 160000 | - | - | 39.23 | 39.97 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r18_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_160k_ade20k/upernet_r18_512x512_160k_ade20k_20220615_113300-791c3f3e.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_160k_ade20k/upernet_r18_512x512_160k_ade20k_20220615_113300.log.json) | +| UPerNet | R-50 | 512x512 | 160000 | - | - | 42.05 | 42.78 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328.log.json) | +| UPerNet | R-101 | 512x512 | 160000 | - | - | 43.82 | 44.85 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951.log.json) | + +### Pascal VOC 2012 + Aug + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | -------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | R-18 | 512x512 | 20000 | 4.8 | 25.80 | 72.9 | 74.71 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r18_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_20k_voc12aug/upernet_r18_512x512_20k_voc12aug_20220614_123910-ed66e455.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_20k_voc12aug/upernet_r18_512x512_20k_voc12aug_20220614_123910.log.json) | +| UPerNet | R-50 | 512x512 | 20000 | 6.4 | 23.17 | 74.82 | 76.35 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330.log.json) | +| UPerNet | R-101 | 512x512 | 20000 | 7.5 | 19.98 | 77.10 | 78.29 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x512_20k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629.log.json) | +| UPerNet | R-18 | 512x512 | 40000 | - | - | 73.71 | 74.61 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r18_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_40k_voc12aug/upernet_r18_512x512_40k_voc12aug_20220614_153605-fafeb868.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_40k_voc12aug/upernet_r18_512x512_40k_voc12aug_20220614_153605.log.json) | +| UPerNet | R-50 | 512x512 | 40000 | - | - | 75.92 | 77.44 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r50_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257.log.json) | +| UPerNet | R-101 | 512x512 | 40000 | - | - | 77.43 | 78.56 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet/upernet_r101_512x512_40k_voc12aug.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet.yml new file mode 100644 index 0000000..0d82e72 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet.yml @@ -0,0 +1,413 @@ +Collections: +- Name: UPerNet + Metadata: + Training Data: + - Cityscapes + - ADE20K + - Pascal VOC 2012 + Aug + Paper: + URL: https://arxiv.org/pdf/1807.10221.pdf + Title: Unified Perceptual Parsing for Scene Understanding + README: configs/upernet/README.md + Code: + URL: https://github.com/open-mmlab/mmsegmentation/blob/v0.17.0/mmseg/models/decode_heads/uper_head.py#L13 + Version: v0.17.0 + Converted From: + Code: https://github.com/CSAILVision/unifiedparsing +Models: +- Name: upernet_r18_512x1024_40k_cityscapes + In Collection: UPerNet + Metadata: + backbone: R-18 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 223.71 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 4.8 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 75.39 + mIoU(ms+flip): 77.0 + Config: configs/upernet/upernet_r18_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x1024_40k_cityscapes/upernet_r18_512x1024_40k_cityscapes_20220615_113231-12ee861d.pth +- Name: upernet_r50_512x1024_40k_cityscapes + In Collection: UPerNet + Metadata: + backbone: R-50 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 235.29 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 6.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.1 + mIoU(ms+flip): 78.37 + Config: configs/upernet/upernet_r50_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_40k_cityscapes/upernet_r50_512x1024_40k_cityscapes_20200605_094827-aa54cb54.pth +- Name: upernet_r101_512x1024_40k_cityscapes + In Collection: UPerNet + Metadata: + backbone: R-101 + crop size: (512,1024) + lr schd: 40000 + inference time (ms/im): + - value: 263.85 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,1024) + Training Memory (GB): 7.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.69 + mIoU(ms+flip): 80.11 + Config: configs/upernet/upernet_r101_512x1024_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_40k_cityscapes/upernet_r101_512x1024_40k_cityscapes_20200605_094933-ebce3b10.pth +- Name: upernet_r50_769x769_40k_cityscapes + In Collection: UPerNet + Metadata: + backbone: R-50 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 568.18 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 7.2 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 77.98 + mIoU(ms+flip): 79.7 + Config: configs/upernet/upernet_r50_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_40k_cityscapes/upernet_r50_769x769_40k_cityscapes_20200530_033048-92d21539.pth +- Name: upernet_r101_769x769_40k_cityscapes + In Collection: UPerNet + Metadata: + backbone: R-101 + crop size: (769,769) + lr schd: 40000 + inference time (ms/im): + - value: 641.03 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (769,769) + Training Memory (GB): 8.4 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.03 + mIoU(ms+flip): 80.77 + Config: configs/upernet/upernet_r101_769x769_40k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_40k_cityscapes/upernet_r101_769x769_40k_cityscapes_20200530_040819-83c95d01.pth +- Name: upernet_r18_512x1024_80k_cityscapes + In Collection: UPerNet + Metadata: + backbone: R-18 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 76.02 + mIoU(ms+flip): 77.38 + Config: configs/upernet/upernet_r18_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x1024_80k_cityscapes/upernet_r18_512x1024_80k_cityscapes_20220614_110712-c89a9188.pth +- Name: upernet_r50_512x1024_80k_cityscapes + In Collection: UPerNet + Metadata: + backbone: R-50 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 78.19 + mIoU(ms+flip): 79.19 + Config: configs/upernet/upernet_r50_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x1024_80k_cityscapes/upernet_r50_512x1024_80k_cityscapes_20200607_052207-848beca8.pth +- Name: upernet_r101_512x1024_80k_cityscapes + In Collection: UPerNet + Metadata: + backbone: R-101 + crop size: (512,1024) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.4 + mIoU(ms+flip): 80.46 + Config: configs/upernet/upernet_r101_512x1024_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x1024_80k_cityscapes/upernet_r101_512x1024_80k_cityscapes_20200607_002403-f05f2345.pth +- Name: upernet_r50_769x769_80k_cityscapes + In Collection: UPerNet + Metadata: + backbone: R-50 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 79.39 + mIoU(ms+flip): 80.92 + Config: configs/upernet/upernet_r50_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_769x769_80k_cityscapes/upernet_r50_769x769_80k_cityscapes_20200607_005107-82ae7d15.pth +- Name: upernet_r101_769x769_80k_cityscapes + In Collection: UPerNet + Metadata: + backbone: R-101 + crop size: (769,769) + lr schd: 80000 + Results: + - Task: Semantic Segmentation + Dataset: Cityscapes + Metrics: + mIoU: 80.1 + mIoU(ms+flip): 81.49 + Config: configs/upernet/upernet_r101_769x769_80k_cityscapes.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_769x769_80k_cityscapes/upernet_r101_769x769_80k_cityscapes_20200607_001014-082fc334.pth +- Name: upernet_r18_512x512_80k_ade20k + In Collection: UPerNet + Metadata: + backbone: R-18 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 40.39 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.6 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 38.76 + mIoU(ms+flip): 39.81 + Config: configs/upernet/upernet_r18_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_80k_ade20k/upernet_r18_512x512_80k_ade20k_20220614_110319-22e81719.pth +- Name: upernet_r50_512x512_80k_ade20k + In Collection: UPerNet + Metadata: + backbone: R-50 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 42.74 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 8.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 40.7 + mIoU(ms+flip): 41.81 + Config: configs/upernet/upernet_r50_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_80k_ade20k/upernet_r50_512x512_80k_ade20k_20200614_144127-ecc8377b.pth +- Name: upernet_r101_512x512_80k_ade20k + In Collection: UPerNet + Metadata: + backbone: R-101 + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 49.16 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.1 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.91 + mIoU(ms+flip): 43.96 + Config: configs/upernet/upernet_r101_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_80k_ade20k/upernet_r101_512x512_80k_ade20k_20200614_185117-32e4db94.pth +- Name: upernet_r18_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: R-18 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 39.23 + mIoU(ms+flip): 39.97 + Config: configs/upernet/upernet_r18_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_160k_ade20k/upernet_r18_512x512_160k_ade20k_20220615_113300-791c3f3e.pth +- Name: upernet_r50_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: R-50 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.05 + mIoU(ms+flip): 42.78 + Config: configs/upernet/upernet_r50_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_160k_ade20k/upernet_r50_512x512_160k_ade20k_20200615_184328-8534de8d.pth +- Name: upernet_r101_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: R-101 + crop size: (512,512) + lr schd: 160000 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.82 + mIoU(ms+flip): 44.85 + Config: configs/upernet/upernet_r101_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_160k_ade20k/upernet_r101_512x512_160k_ade20k_20200615_161951-91b32684.pth +- Name: upernet_r18_512x512_20k_voc12aug + In Collection: UPerNet + Metadata: + backbone: R-18 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 38.76 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 4.8 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 72.9 + mIoU(ms+flip): 74.71 + Config: configs/upernet/upernet_r18_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_20k_voc12aug/upernet_r18_512x512_20k_voc12aug_20220614_123910-ed66e455.pth +- Name: upernet_r50_512x512_20k_voc12aug + In Collection: UPerNet + Metadata: + backbone: R-50 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 43.16 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 6.4 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 74.82 + mIoU(ms+flip): 76.35 + Config: configs/upernet/upernet_r50_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_20k_voc12aug/upernet_r50_512x512_20k_voc12aug_20200617_165330-5b5890a7.pth +- Name: upernet_r101_512x512_20k_voc12aug + In Collection: UPerNet + Metadata: + backbone: R-101 + crop size: (512,512) + lr schd: 20000 + inference time (ms/im): + - value: 50.05 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.5 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.1 + mIoU(ms+flip): 78.29 + Config: configs/upernet/upernet_r101_512x512_20k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_20k_voc12aug/upernet_r101_512x512_20k_voc12aug_20200617_165629-f14e7f27.pth +- Name: upernet_r18_512x512_40k_voc12aug + In Collection: UPerNet + Metadata: + backbone: R-18 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 73.71 + mIoU(ms+flip): 74.61 + Config: configs/upernet/upernet_r18_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r18_512x512_40k_voc12aug/upernet_r18_512x512_40k_voc12aug_20220614_153605-fafeb868.pth +- Name: upernet_r50_512x512_40k_voc12aug + In Collection: UPerNet + Metadata: + backbone: R-50 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 75.92 + mIoU(ms+flip): 77.44 + Config: configs/upernet/upernet_r50_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r50_512x512_40k_voc12aug/upernet_r50_512x512_40k_voc12aug_20200613_162257-ca9bcc6b.pth +- Name: upernet_r101_512x512_40k_voc12aug + In Collection: UPerNet + Metadata: + backbone: R-101 + crop size: (512,512) + lr schd: 40000 + Results: + - Task: Semantic Segmentation + Dataset: Pascal VOC 2012 + Aug + Metrics: + mIoU: 77.43 + mIoU(ms+flip): 78.56 + Config: configs/upernet/upernet_r101_512x512_40k_voc12aug.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/upernet/upernet_r101_512x512_40k_voc12aug/upernet_r101_512x512_40k_voc12aug_20200613_163549-e26476ac.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..b90b597 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x1024_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x1024_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..420ca2e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x1024_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x1024_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_160k_ade20k.py new file mode 100644 index 0000000..146f13e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_160k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x512_160k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_20k_voc12aug.py new file mode 100644 index 0000000..56345d1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_20k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x512_20k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_40k_voc12aug.py new file mode 100644 index 0000000..0669b74 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_40k_voc12aug.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x512_40k_voc12aug.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_80k_ade20k.py new file mode 100644 index 0000000..abfb9c5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_512x512_80k_ade20k.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_512x512_80k_ade20k.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_769x769_40k_cityscapes.py new file mode 100644 index 0000000..e5f3a3f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_769x769_40k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_769x769_40k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_769x769_80k_cityscapes.py new file mode 100644 index 0000000..a709165 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r101_769x769_80k_cityscapes.py @@ -0,0 +1,2 @@ +_base_ = './upernet_r50_769x769_80k_cityscapes.py' +model = dict(pretrained='open-mmlab://resnet101_v1c', backbone=dict(depth=101)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..f5aec1f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x1024_40k_cityscapes.py @@ -0,0 +1,6 @@ +_base_ = './upernet_r50_512x1024_40k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512]), + auxiliary_head=dict(in_channels=256)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..444f362 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x1024_80k_cityscapes.py @@ -0,0 +1,6 @@ +_base_ = './upernet_r50_512x1024_80k_cityscapes.py' +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512]), + auxiliary_head=dict(in_channels=256)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_160k_ade20k.py new file mode 100644 index 0000000..9ac6c35 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150), + auxiliary_head=dict(in_channels=256, num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_20k_voc12aug.py new file mode 100644 index 0000000..5cae4f5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_20k_voc12aug.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21), + auxiliary_head=dict(in_channels=256, num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_40k_voc12aug.py new file mode 100644 index 0000000..652ded7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_40k_voc12aug.py @@ -0,0 +1,10 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=21), + auxiliary_head=dict(in_channels=256, num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_80k_ade20k.py new file mode 100644 index 0000000..1a7956d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r18_512x512_80k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + pretrained='open-mmlab://resnet18_v1c', + backbone=dict(depth=18), + decode_head=dict(in_channels=[64, 128, 256, 512], num_classes=150), + auxiliary_head=dict(in_channels=256, num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py new file mode 100644 index 0000000..d621e89 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x1024_40k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py new file mode 100644 index 0000000..95fffcc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x1024_80k_cityscapes.py @@ -0,0 +1,4 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_160k_ade20k.py new file mode 100644 index 0000000..f5dd9aa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_20k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_20k_voc12aug.py new file mode 100644 index 0000000..95f5c09 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_20k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_20k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_40k_voc12aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_40k_voc12aug.py new file mode 100644 index 0000000..9621fd1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_40k_voc12aug.py @@ -0,0 +1,7 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/pascal_voc12_aug.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(num_classes=21), auxiliary_head=dict(num_classes=21)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_80k_ade20k.py new file mode 100644 index 0000000..f561e30 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', + '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(num_classes=150), auxiliary_head=dict(num_classes=150)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_769x769_40k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_769x769_40k_cityscapes.py new file mode 100644 index 0000000..89b18aa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_769x769_40k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_40k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_769x769_80k_cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_769x769_80k_cityscapes.py new file mode 100644 index 0000000..29af98f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/upernet/upernet_r50_769x769_80k_cityscapes.py @@ -0,0 +1,9 @@ +_base_ = [ + '../_base_/models/upernet_r50.py', + '../_base_/datasets/cityscapes_769x769.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] +model = dict( + decode_head=dict(align_corners=True), + auxiliary_head=dict(align_corners=True), + test_cfg=dict(mode='slide', crop_size=(769, 769), stride=(513, 513))) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/README.md b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/README.md new file mode 100644 index 0000000..bfa20f4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/README.md @@ -0,0 +1,70 @@ +# Vision Transformer + +[An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale](https://arxiv.org/pdf/2010.11929.pdf) + +## Introduction + + + +Official Repo + +Code Snippet + +## Abstract + + + +While the Transformer architecture has become the de-facto standard for natural language processing tasks, its applications to computer vision remain limited. In vision, attention is either applied in conjunction with convolutional networks, or used to replace certain components of convolutional networks while keeping their overall structure in place. We show that this reliance on CNNs is not necessary and a pure transformer applied directly to sequences of image patches can perform very well on image classification tasks. When pre-trained on large amounts of data and transferred to multiple mid-sized or small image recognition benchmarks (ImageNet, CIFAR-100, VTAB, etc.), Vision Transformer (ViT) attains excellent results compared to state-of-the-art convolutional networks while requiring substantially fewer computational resources to train. + + + +
+ +
+ +## Citation + +```bibtex +@article{dosoViTskiy2020, + title={An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale}, + author={DosoViTskiy, Alexey and Beyer, Lucas and Kolesnikov, Alexander and Weissenborn, Dirk and Zhai, Xiaohua and Unterthiner, Thomas and Dehghani, Mostafa and Minderer, Matthias and Heigold, Georg and Gelly, Sylvain and Uszkoreit, Jakob and Houlsby, Neil}, + journal={arXiv preprint arXiv:2010.11929}, + year={2020} +} +``` + +## Usage + +To use other repositories' pre-trained models, it is necessary to convert keys. + +We provide a script [`vit2mmseg.py`](../../tools/model_converters/vit2mmseg.py) in the tools directory to convert the key of models from [timm](https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py) to MMSegmentation style. + +```shell +python tools/model_converters/vit2mmseg.py ${PRETRAIN_PATH} ${STORE_PATH} +``` + +E.g. + +```shell +python tools/model_converters/vit2mmseg.py https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth pretrain/jx_vit_base_p16_224-80ecf9dd.pth +``` + +This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`. + +## Results and models + +### ADE20K + +| Method | Backbone | Crop Size | Lr schd | Mem (GB) | Inf time (fps) | mIoU | mIoU(ms+flip) | config | download | +| ------- | ----------------- | --------- | ------: | -------- | -------------- | ----: | ------------: | ----------------------------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| UPerNet | ViT-B + MLN | 512x512 | 80000 | 9.20 | 6.94 | 47.71 | 49.51 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/upernet_vit-b16_mln_512x512_80k_ade20k_20210624_130547-0403cee1.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/20210624_130547.log.json) | +| UPerNet | ViT-B + MLN | 512x512 | 160000 | 9.20 | 7.58 | 46.75 | 48.46 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/upernet_vit-b16_mln_512x512_160k_ade20k_20210624_130547-852fa768.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/20210623_192432.log.json) | +| UPerNet | ViT-B + LN + MLN | 512x512 | 160000 | 9.21 | 6.82 | 47.73 | 49.95 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k_20210621_172828-f444c077.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/20210621_172828.log.json) | +| UPerNet | DeiT-S | 512x512 | 80000 | 4.68 | 29.85 | 42.96 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k_20210624_095228-afc93ec2.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/20210624_095228.log.json) | +| UPerNet | DeiT-S | 512x512 | 160000 | 4.68 | 29.19 | 42.87 | 43.79 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k_20210621_160903-5110d916.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/20210621_160903.log.json) | +| UPerNet | DeiT-S + MLN | 512x512 | 160000 | 5.69 | 11.18 | 43.82 | 45.07 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/upernet_deit-s16_mln_512x512_160k_ade20k_20210621_161021-fb9a5dfb.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/20210621_161021.log.json) | +| UPerNet | DeiT-S + LN + MLN | 512x512 | 160000 | 5.69 | 12.39 | 43.52 | 45.01 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k_20210621_161021-c0cd652f.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/20210621_161021.log.json) | +| UPerNet | DeiT-B | 512x512 | 80000 | 7.75 | 9.69 | 45.24 | 46.73 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k_20210624_130529-1e090789.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/20210624_130529.log.json) | +| UPerNet | DeiT-B | 512x512 | 160000 | 7.75 | 10.39 | 45.36 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k_20210621_180100-828705d7.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/20210621_180100.log.json) | +| UPerNet | DeiT-B + MLN | 512x512 | 160000 | 9.21 | 7.78 | 45.46 | 47.16 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/upernet_deit-b16_mln_512x512_160k_ade20k_20210621_191949-4e1450f3.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/20210621_191949.log.json) | +| UPerNet | DeiT-B + LN + MLN | 512x512 | 160000 | 9.21 | 7.75 | 45.37 | 47.23 | [config](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py) | [model](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k_20210623_153535-8a959c14.pth) \| [log](https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/20210623_153535.log.json) | diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py new file mode 100644 index 0000000..68f4bd4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' + +model = dict( + pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', + backbone=dict(drop_path_rate=0.1), + neck=None) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py new file mode 100644 index 0000000..7204826 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_512x512_80k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = './upernet_vit-b16_mln_512x512_80k_ade20k.py' + +model = dict( + pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', + backbone=dict(drop_path_rate=0.1), + neck=None) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py new file mode 100644 index 0000000..32909ff --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py @@ -0,0 +1,5 @@ +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' + +model = dict( + pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', + backbone=dict(drop_path_rate=0.1, final_norm=True)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_mln_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_mln_512x512_160k_ade20k.py new file mode 100644 index 0000000..4abefe8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-b16_mln_512x512_160k_ade20k.py @@ -0,0 +1,6 @@ +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' + +model = dict( + pretrained='pretrain/deit_base_patch16_224-b5f2ef4d.pth', + backbone=dict(drop_path_rate=0.1), +) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py new file mode 100644 index 0000000..290ff19 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_512x512_160k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' + +model = dict( + pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=None, + auxiliary_head=dict(num_classes=150, in_channels=384)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py new file mode 100644 index 0000000..605d264 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_512x512_80k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = './upernet_vit-b16_mln_512x512_80k_ade20k.py' + +model = dict( + pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=None, + auxiliary_head=dict(num_classes=150, in_channels=384)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py new file mode 100644 index 0000000..ef743a2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py @@ -0,0 +1,9 @@ +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' + +model = dict( + pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', + backbone=dict( + num_heads=6, embed_dims=384, drop_path_rate=0.1, final_norm=True), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), + auxiliary_head=dict(num_classes=150, in_channels=384)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_mln_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_mln_512x512_160k_ade20k.py new file mode 100644 index 0000000..069cab7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_deit-s16_mln_512x512_160k_ade20k.py @@ -0,0 +1,8 @@ +_base_ = './upernet_vit-b16_mln_512x512_160k_ade20k.py' + +model = dict( + pretrained='pretrain/deit_small_patch16_224-cd65a155.pth', + backbone=dict(num_heads=6, embed_dims=384, drop_path_rate=0.1), + decode_head=dict(num_classes=150, in_channels=[384, 384, 384, 384]), + neck=dict(in_channels=[384, 384, 384, 384], out_channels=384), + auxiliary_head=dict(num_classes=150, in_channels=384)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py new file mode 100644 index 0000000..51eeda0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py @@ -0,0 +1,39 @@ +_base_ = [ + '../_base_/models/upernet_vit-b16_ln_mln.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] + +model = dict( + pretrained='pretrain/vit_base_patch16_224.pth', + backbone=dict(drop_path_rate=0.1, final_norm=True), + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py new file mode 100644 index 0000000..5b148d7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py @@ -0,0 +1,38 @@ +_base_ = [ + '../_base_/models/upernet_vit-b16_ln_mln.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_160k.py' +] + +model = dict( + pretrained='pretrain/vit_base_patch16_224.pth', + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py new file mode 100644 index 0000000..f893500 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py @@ -0,0 +1,38 @@ +_base_ = [ + '../_base_/models/upernet_vit-b16_ln_mln.py', + '../_base_/datasets/ade20k.py', '../_base_/default_runtime.py', + '../_base_/schedules/schedule_80k.py' +] + +model = dict( + pretrained='pretrain/vit_base_patch16_224.pth', + decode_head=dict(num_classes=150), + auxiliary_head=dict(num_classes=150)) + +# AdamW optimizer, no weight decay for position embedding & layer norm +# in backbone +optimizer = dict( + _delete_=True, + type='AdamW', + lr=0.00006, + betas=(0.9, 0.999), + weight_decay=0.01, + paramwise_cfg=dict( + custom_keys={ + 'pos_embed': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.), + 'norm': dict(decay_mult=0.) + })) + +lr_config = dict( + _delete_=True, + policy='poly', + warmup='linear', + warmup_iters=1500, + warmup_ratio=1e-6, + power=1.0, + min_lr=0.0, + by_epoch=False) + +# By default, models are trained on 8 GPUs with 2 images per GPU +data = dict(samples_per_gpu=2) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/vit.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/vit.yml new file mode 100644 index 0000000..35e4952 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/configs/vit/vit.yml @@ -0,0 +1,243 @@ +Models: +- Name: upernet_vit-b16_mln_512x512_80k_ade20k + In Collection: UPerNet + Metadata: + backbone: ViT-B + MLN + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 144.09 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.71 + mIoU(ms+flip): 49.51 + Config: configs/vit/upernet_vit-b16_mln_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_80k_ade20k/upernet_vit-b16_mln_512x512_80k_ade20k_20210624_130547-0403cee1.pth +- Name: upernet_vit-b16_mln_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: ViT-B + MLN + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 131.93 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.2 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 46.75 + mIoU(ms+flip): 48.46 + Config: configs/vit/upernet_vit-b16_mln_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_mln_512x512_160k_ade20k/upernet_vit-b16_mln_512x512_160k_ade20k_20210624_130547-852fa768.pth +- Name: upernet_vit-b16_ln_mln_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: ViT-B + LN + MLN + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 146.63 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.21 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 47.73 + mIoU(ms+flip): 49.95 + Config: configs/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_vit-b16_ln_mln_512x512_160k_ade20k/upernet_vit-b16_ln_mln_512x512_160k_ade20k_20210621_172828-f444c077.pth +- Name: upernet_deit-s16_512x512_80k_ade20k + In Collection: UPerNet + Metadata: + backbone: DeiT-S + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 33.5 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 4.68 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.96 + mIoU(ms+flip): 43.79 + Config: configs/vit/upernet_deit-s16_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_80k_ade20k/upernet_deit-s16_512x512_80k_ade20k_20210624_095228-afc93ec2.pth +- Name: upernet_deit-s16_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: DeiT-S + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 34.26 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 4.68 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 42.87 + mIoU(ms+flip): 43.79 + Config: configs/vit/upernet_deit-s16_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_512x512_160k_ade20k/upernet_deit-s16_512x512_160k_ade20k_20210621_160903-5110d916.pth +- Name: upernet_deit-s16_mln_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: DeiT-S + MLN + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 89.45 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 5.69 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.82 + mIoU(ms+flip): 45.07 + Config: configs/vit/upernet_deit-s16_mln_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_mln_512x512_160k_ade20k/upernet_deit-s16_mln_512x512_160k_ade20k_20210621_161021-fb9a5dfb.pth +- Name: upernet_deit-s16_ln_mln_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: DeiT-S + LN + MLN + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 80.71 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 5.69 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 43.52 + mIoU(ms+flip): 45.01 + Config: configs/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-s16_ln_mln_512x512_160k_ade20k/upernet_deit-s16_ln_mln_512x512_160k_ade20k_20210621_161021-c0cd652f.pth +- Name: upernet_deit-b16_512x512_80k_ade20k + In Collection: UPerNet + Metadata: + backbone: DeiT-B + crop size: (512,512) + lr schd: 80000 + inference time (ms/im): + - value: 103.2 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.75 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.24 + mIoU(ms+flip): 46.73 + Config: configs/vit/upernet_deit-b16_512x512_80k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_80k_ade20k/upernet_deit-b16_512x512_80k_ade20k_20210624_130529-1e090789.pth +- Name: upernet_deit-b16_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: DeiT-B + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 96.25 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 7.75 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.36 + mIoU(ms+flip): 47.16 + Config: configs/vit/upernet_deit-b16_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_512x512_160k_ade20k/upernet_deit-b16_512x512_160k_ade20k_20210621_180100-828705d7.pth +- Name: upernet_deit-b16_mln_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: DeiT-B + MLN + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 128.53 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.21 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.46 + mIoU(ms+flip): 47.16 + Config: configs/vit/upernet_deit-b16_mln_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_mln_512x512_160k_ade20k/upernet_deit-b16_mln_512x512_160k_ade20k_20210621_191949-4e1450f3.pth +- Name: upernet_deit-b16_ln_mln_512x512_160k_ade20k + In Collection: UPerNet + Metadata: + backbone: DeiT-B + LN + MLN + crop size: (512,512) + lr schd: 160000 + inference time (ms/im): + - value: 129.03 + hardware: V100 + backend: PyTorch + batch size: 1 + mode: FP32 + resolution: (512,512) + Training Memory (GB): 9.21 + Results: + - Task: Semantic Segmentation + Dataset: ADE20K + Metrics: + mIoU: 45.37 + mIoU(ms+flip): 47.23 + Config: configs/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k.py + Weights: https://download.openmmlab.com/mmsegmentation/v0.5/vit/upernet_deit-b16_ln_mln_512x512_160k_ade20k/upernet_deit-b16_ln_mln_512x512_160k_ade20k_20210623_153535-8a959c14.pth diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/demo/MMSegmentation_Tutorial.ipynb b/prediction/image/mx15hdi/Detect/mmsegmentation/demo/MMSegmentation_Tutorial.ipynb new file mode 100644 index 0000000..a75ec5c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/demo/MMSegmentation_Tutorial.ipynb @@ -0,0 +1,649 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FVmnaxFJvsb8" + }, + "source": [ + "# MMSegmentation Tutorial\n", + "Welcome to MMSegmentation! \n", + "\n", + "In this tutorial, we demo\n", + "* How to do inference with MMSeg trained weight\n", + "* How to train on your own dataset and visualize the results. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QS8YHrEhbpas" + }, + "source": [ + "## Install MMSegmentation\n", + "This step may take several minutes. \n", + "\n", + "We use PyTorch 1.10 and CUDA 11.1 for this tutorial. You may install other versions by change the version number in pip install command. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UWyLrLYaNEaL", + "outputId": "32a47fe3-f10d-47a1-f6b9-b7c235abdab1" + }, + "outputs": [], + "source": [ + "# Check nvcc version\n", + "!nvcc -V\n", + "# Check GCC version\n", + "!gcc --version" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ki3WUBjKbutg", + "outputId": "14bd14b0-4d8c-4fa9-e3f9-da35c0efc0d5" + }, + "outputs": [], + "source": [ + "# Install PyTorch\n", + "!conda install pytorch=1.10.0 torchvision cudatoolkit=11.1 -c pytorch\n", + "# Install MMCV\n", + "!pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.10/index.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nR-hHRvbNJJZ", + "outputId": "10c3b131-d4db-458c-fc10-b94b1c6ed546" + }, + "outputs": [], + "source": [ + "!rm -rf mmsegmentation\n", + "!git clone https://github.com/open-mmlab/mmsegmentation.git \n", + "%cd mmsegmentation\n", + "!pip install -e ." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "mAE_h7XhPT7d", + "outputId": "83bf0f8e-fc69-40b1-f9fe-0025724a217c" + }, + "outputs": [], + "source": [ + "# Check Pytorch installation\n", + "import torch, torchvision\n", + "print(torch.__version__, torch.cuda.is_available())\n", + "\n", + "# Check MMSegmentation installation\n", + "import mmseg\n", + "print(mmseg.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eUcuC3dUv32I" + }, + "source": [ + "## Run Inference with MMSeg trained weight" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "2hd41IGaiNet", + "outputId": "b7b2aafc-edf2-43e4-ea43-0b5dd0aa4b4a" + }, + "outputs": [], + "source": [ + "!mkdir checkpoints\n", + "!wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P checkpoints" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H8Fxg8i-wHJE" + }, + "outputs": [], + "source": [ + "from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot\n", + "from mmseg.core.evaluation import get_palette" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "umk8sJ0Xuace" + }, + "outputs": [], + "source": [ + "config_file = 'configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'\n", + "checkpoint_file = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "nWlQFuTgudxu", + "outputId": "5e45f4f6-5bcf-4d04-bb9c-0428ee84a576" + }, + "outputs": [], + "source": [ + "# build the model from a config file and a checkpoint file\n", + "model = init_segmentor(config_file, checkpoint_file, device='cuda:0')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "izFv6pSRujk9" + }, + "outputs": [], + "source": [ + "# test a single image\n", + "img = 'demo/demo.png'\n", + "result = inference_segmentor(model, img)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 504 + }, + "id": "bDcs9udgunQK", + "outputId": "7c55f713-4085-47fd-fa06-720a321d0795" + }, + "outputs": [], + "source": [ + "# show the results\n", + "show_result_pyplot(model, img, result, get_palette('cityscapes'))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ta51clKX4cwM" + }, + "source": [ + "## Train a semantic segmentation model on a new dataset\n", + "\n", + "To train on a customized dataset, the following steps are necessary. \n", + "1. Add a new dataset class. \n", + "2. Create a config file accordingly. \n", + "3. Perform training and evaluation. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AcZg6x_K5Zs3" + }, + "source": [ + "### Add a new dataset\n", + "\n", + "Datasets in MMSegmentation require image and semantic segmentation maps to be placed in folders with the same prefix. To support a new dataset, we may need to modify the original file structure. \n", + "\n", + "In this tutorial, we give an example of converting the dataset. You may refer to [docs](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/tutorials/customize_datasets.md#customize-datasets-by-reorganizing-data) for details about dataset reorganization. \n", + "\n", + "We use [Stanford Background Dataset](http://dags.stanford.edu/projects/scenedataset.html) as an example. The dataset contains 715 images chosen from existing public datasets [LabelMe](http://labelme.csail.mit.edu), [MSRC](http://research.microsoft.com/en-us/projects/objectclassrecognition), [PASCAL VOC](http://pascallin.ecs.soton.ac.uk/challenges/VOC) and [Geometric Context](http://www.cs.illinois.edu/homes/dhoiem/). Images from these datasets are mainly outdoor scenes, each containing approximately 320-by-240 pixels. \n", + "In this tutorial, we use the region annotations as labels. There are 8 classes in total, i.e. sky, tree, road, grass, water, building, mountain, and foreground object. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TFIt7MHq5Wls", + "outputId": "74a126e4-c8a4-4d2f-a910-b58b71843a23" + }, + "outputs": [], + "source": [ + "# download and unzip\n", + "!wget http://dags.stanford.edu/data/iccv09Data.tar.gz -O stanford_background.tar.gz\n", + "!tar xf stanford_background.tar.gz" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 377 + }, + "id": "78LIci7F9WWI", + "outputId": "c432ddac-5a50-47b1-daac-5a26b07afea2" + }, + "outputs": [], + "source": [ + "# Let's take a look at the dataset\n", + "import mmcv\n", + "import matplotlib.pyplot as plt\n", + "\n", + "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n", + "plt.figure(figsize=(8, 6))\n", + "plt.imshow(mmcv.bgr2rgb(img))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L5mNQuc2GsVE" + }, + "source": [ + "We need to convert the annotation into semantic map format as an image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WnGZfribFHCx" + }, + "outputs": [], + "source": [ + "import os.path as osp\n", + "import numpy as np\n", + "from PIL import Image\n", + "# convert dataset annotation to semantic segmentation map\n", + "data_root = 'iccv09Data'\n", + "img_dir = 'images'\n", + "ann_dir = 'labels'\n", + "# define class and plaette for better visualization\n", + "classes = ('sky', 'tree', 'road', 'grass', 'water', 'bldg', 'mntn', 'fg obj')\n", + "palette = [[128, 128, 128], [129, 127, 38], [120, 69, 125], [53, 125, 34], \n", + " [0, 11, 123], [118, 20, 12], [122, 81, 25], [241, 134, 51]]\n", + "for file in mmcv.scandir(osp.join(data_root, ann_dir), suffix='.regions.txt'):\n", + " seg_map = np.loadtxt(osp.join(data_root, ann_dir, file)).astype(np.uint8)\n", + " seg_img = Image.fromarray(seg_map).convert('P')\n", + " seg_img.putpalette(np.array(palette, dtype=np.uint8))\n", + " seg_img.save(osp.join(data_root, ann_dir, file.replace('.regions.txt', \n", + " '.png')))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 377 + }, + "id": "5MCSS9ABfSks", + "outputId": "92b9bafc-589e-48fc-c9e9-476f125d6522" + }, + "outputs": [], + "source": [ + "# Let's take a look at the segmentation map we got\n", + "import matplotlib.patches as mpatches\n", + "img = Image.open('iccv09Data/labels/6000124.png')\n", + "plt.figure(figsize=(8, 6))\n", + "im = plt.imshow(np.array(img.convert('RGB')))\n", + "\n", + "# create a patch (proxy artist) for every color \n", + "patches = [mpatches.Patch(color=np.array(palette[i])/255., \n", + " label=classes[i]) for i in range(8)]\n", + "# put those patched as legend-handles into the legend\n", + "plt.legend(handles=patches, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., \n", + " fontsize='large')\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WbeLYCp2k5hl" + }, + "outputs": [], + "source": [ + "# split train/val set randomly\n", + "split_dir = 'splits'\n", + "mmcv.mkdir_or_exist(osp.join(data_root, split_dir))\n", + "filename_list = [osp.splitext(filename)[0] for filename in mmcv.scandir(\n", + " osp.join(data_root, ann_dir), suffix='.png')]\n", + "with open(osp.join(data_root, split_dir, 'train.txt'), 'w') as f:\n", + " # select first 4/5 as train set\n", + " train_length = int(len(filename_list)*4/5)\n", + " f.writelines(line + '\\n' for line in filename_list[:train_length])\n", + "with open(osp.join(data_root, split_dir, 'val.txt'), 'w') as f:\n", + " # select last 1/5 as train set\n", + " f.writelines(line + '\\n' for line in filename_list[train_length:])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HchvmGYB_rrO" + }, + "source": [ + "After downloading the data, we need to implement `load_annotations` function in the new dataset class `StanfordBackgroundDataset`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LbsWOw62_o-X" + }, + "outputs": [], + "source": [ + "from mmseg.datasets.builder import DATASETS\n", + "from mmseg.datasets.custom import CustomDataset\n", + "\n", + "@DATASETS.register_module()\n", + "class StanfordBackgroundDataset(CustomDataset):\n", + " CLASSES = classes\n", + " PALETTE = palette\n", + " def __init__(self, split, **kwargs):\n", + " super().__init__(img_suffix='.jpg', seg_map_suffix='.png', \n", + " split=split, **kwargs)\n", + " assert osp.exists(self.img_dir) and self.split is not None\n", + "\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yUVtmn3Iq3WA" + }, + "source": [ + "### Create a config file\n", + "In the next step, we need to modify the config for the training. To accelerate the process, we finetune the model from trained weights." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Wwnj9tRzqX_A" + }, + "outputs": [], + "source": [ + "from mmcv import Config\n", + "cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1y2oV5w97jQo" + }, + "source": [ + "Since the given config is used to train PSPNet on the cityscapes dataset, we need to modify it accordingly for our new dataset. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eyKnYC1Z7iCV", + "outputId": "6195217b-187f-4675-994b-ba90d8bb3078" + }, + "outputs": [], + "source": [ + "from mmseg.apis import set_random_seed\n", + "\n", + "# Since we use only one GPU, BN is used instead of SyncBN\n", + "cfg.norm_cfg = dict(type='BN', requires_grad=True)\n", + "cfg.model.backbone.norm_cfg = cfg.norm_cfg\n", + "cfg.model.decode_head.norm_cfg = cfg.norm_cfg\n", + "cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg\n", + "# modify num classes of the model in decode/auxiliary head\n", + "cfg.model.decode_head.num_classes = 8\n", + "cfg.model.auxiliary_head.num_classes = 8\n", + "\n", + "# Modify dataset type and path\n", + "cfg.dataset_type = 'StanfordBackgroundDataset'\n", + "cfg.data_root = data_root\n", + "\n", + "cfg.data.samples_per_gpu = 8\n", + "cfg.data.workers_per_gpu=8\n", + "\n", + "cfg.img_norm_cfg = dict(\n", + " mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n", + "cfg.crop_size = (256, 256)\n", + "cfg.train_pipeline = [\n", + " dict(type='LoadImageFromFile'),\n", + " dict(type='LoadAnnotations'),\n", + " dict(type='Resize', img_scale=(320, 240), ratio_range=(0.5, 2.0)),\n", + " dict(type='RandomCrop', crop_size=cfg.crop_size, cat_max_ratio=0.75),\n", + " dict(type='RandomFlip', flip_ratio=0.5),\n", + " dict(type='PhotoMetricDistortion'),\n", + " dict(type='Normalize', **cfg.img_norm_cfg),\n", + " dict(type='Pad', size=cfg.crop_size, pad_val=0, seg_pad_val=255),\n", + " dict(type='DefaultFormatBundle'),\n", + " dict(type='Collect', keys=['img', 'gt_semantic_seg']),\n", + "]\n", + "\n", + "cfg.test_pipeline = [\n", + " dict(type='LoadImageFromFile'),\n", + " dict(\n", + " type='MultiScaleFlipAug',\n", + " img_scale=(320, 240),\n", + " # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],\n", + " flip=False,\n", + " transforms=[\n", + " dict(type='Resize', keep_ratio=True),\n", + " dict(type='RandomFlip'),\n", + " dict(type='Normalize', **cfg.img_norm_cfg),\n", + " dict(type='ImageToTensor', keys=['img']),\n", + " dict(type='Collect', keys=['img']),\n", + " ])\n", + "]\n", + "\n", + "\n", + "cfg.data.train.type = cfg.dataset_type\n", + "cfg.data.train.data_root = cfg.data_root\n", + "cfg.data.train.img_dir = img_dir\n", + "cfg.data.train.ann_dir = ann_dir\n", + "cfg.data.train.pipeline = cfg.train_pipeline\n", + "cfg.data.train.split = 'splits/train.txt'\n", + "\n", + "cfg.data.val.type = cfg.dataset_type\n", + "cfg.data.val.data_root = cfg.data_root\n", + "cfg.data.val.img_dir = img_dir\n", + "cfg.data.val.ann_dir = ann_dir\n", + "cfg.data.val.pipeline = cfg.test_pipeline\n", + "cfg.data.val.split = 'splits/val.txt'\n", + "\n", + "cfg.data.test.type = cfg.dataset_type\n", + "cfg.data.test.data_root = cfg.data_root\n", + "cfg.data.test.img_dir = img_dir\n", + "cfg.data.test.ann_dir = ann_dir\n", + "cfg.data.test.pipeline = cfg.test_pipeline\n", + "cfg.data.test.split = 'splits/val.txt'\n", + "\n", + "# We can still use the pre-trained Mask RCNN model though we do not need to\n", + "# use the mask branch\n", + "cfg.load_from = 'checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'\n", + "\n", + "# Set up working dir to save files and logs.\n", + "cfg.work_dir = './work_dirs/tutorial'\n", + "\n", + "cfg.runner.max_iters = 200\n", + "cfg.log_config.interval = 10\n", + "cfg.evaluation.interval = 200\n", + "cfg.checkpoint_config.interval = 200\n", + "\n", + "# Set seed to facitate reproducing the result\n", + "cfg.seed = 0\n", + "set_random_seed(0, deterministic=False)\n", + "cfg.gpu_ids = range(1)\n", + "\n", + "# Let's have a look at the final config used for training\n", + "print(f'Config:\\n{cfg.pretty_text}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QWuH14LYF2gQ" + }, + "source": [ + "### Train and Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jYKoSfdMF12B", + "outputId": "422219ca-d7a5-4890-f09f-88c959942e64" + }, + "outputs": [], + "source": [ + "from mmseg.datasets import build_dataset\n", + "from mmseg.models import build_segmentor\n", + "from mmseg.apis import train_segmentor\n", + "\n", + "\n", + "# Build the dataset\n", + "datasets = [build_dataset(cfg.data.train)]\n", + "\n", + "# Build the detector\n", + "model = build_segmentor(cfg.model)\n", + "# Add an attribute for visualization convenience\n", + "model.CLASSES = datasets[0].CLASSES\n", + "\n", + "# Create work_dir\n", + "mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))\n", + "train_segmentor(model, datasets, cfg, distributed=False, validate=True, \n", + " meta=dict())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DEkWOP-NMbc_" + }, + "source": [ + "Inference with trained model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 645 + }, + "id": "ekG__UfaH_OU", + "outputId": "1437419c-869a-4902-df86-d4f6f8b2597a" + }, + "outputs": [], + "source": [ + "img = mmcv.imread('iccv09Data/images/6000124.jpg')\n", + "\n", + "model.cfg = cfg\n", + "result = inference_segmentor(model, img)\n", + "plt.figure(figsize=(8, 6))\n", + "show_result_pyplot(model, img, result, palette)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "include_colab_link": true, + "name": "MMSegmentation Tutorial.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/demo/demo.png b/prediction/image/mx15hdi/Detect/mmsegmentation/demo/demo.png new file mode 100644 index 0000000..1e82d7a Binary files /dev/null and b/prediction/image/mx15hdi/Detect/mmsegmentation/demo/demo.png differ diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/demo/image_demo.py b/prediction/image/mx15hdi/Detect/mmsegmentation/demo/image_demo.py new file mode 100644 index 0000000..87d6d6c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/demo/image_demo.py @@ -0,0 +1,42 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser + +from mmseg.apis import inference_segmentor, init_segmentor, show_result_pyplot +from mmseg.core.evaluation import get_palette + + +def main(): + parser = ArgumentParser() + parser.add_argument('img', help='Image file') + parser.add_argument('config', help='Config file') + parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument('--out-file', default=None, help='Path to output file') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--palette', + default='cityscapes', + help='Color palette used for segmentation map') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + args = parser.parse_args() + + # build the model from a config file and a checkpoint file + model = init_segmentor(args.config, args.checkpoint, device=args.device) + # test a single image + result = inference_segmentor(model, args.img) + # show the results + show_result_pyplot( + model, + args.img, + result, + get_palette(args.palette), + opacity=args.opacity, + out_file=args.out_file) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/demo/inference_demo.ipynb b/prediction/image/mx15hdi/Detect/mmsegmentation/demo/inference_demo.ipynb new file mode 100644 index 0000000..66a447b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/demo/inference_demo.ipynb @@ -0,0 +1,110 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!mkdir ../checkpoints\n", + "!wget https://download.openmmlab.com/mmsegmentation/v0.5/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth -P ../checkpoints" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": true + } + }, + "outputs": [], + "source": [ + "from mmseg.apis import init_segmentor, inference_segmentor, show_result_pyplot\n", + "from mmseg.core.evaluation import get_palette" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "is_executing": true + } + }, + "outputs": [], + "source": [ + "config_file = '../configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py'\n", + "checkpoint_file = '../checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# build the model from a config file and a checkpoint file\n", + "model = init_segmentor(config_file, checkpoint_file, device='cuda:0')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test a single image\n", + "img = 'demo.png'\n", + "result = inference_segmentor(model, img)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# show the results\n", + "show_result_pyplot(model, img, result, get_palette('cityscapes'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + }, + "pycharm": { + "stem_cell": { + "cell_type": "raw", + "metadata": { + "collapsed": false + }, + "source": [] + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/demo/video_demo.py b/prediction/image/mx15hdi/Detect/mmsegmentation/demo/video_demo.py new file mode 100644 index 0000000..eb4fd69 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/demo/video_demo.py @@ -0,0 +1,112 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser + +import cv2 + +from mmseg.apis import inference_segmentor, init_segmentor +from mmseg.core.evaluation import get_palette + + +def main(): + parser = ArgumentParser() + parser.add_argument('video', help='Video file or webcam id') + parser.add_argument('config', help='Config file') + parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + parser.add_argument( + '--palette', + default='cityscapes', + help='Color palette used for segmentation map') + parser.add_argument( + '--show', action='store_true', help='Whether to show draw result') + parser.add_argument( + '--show-wait-time', default=1, type=int, help='Wait time after imshow') + parser.add_argument( + '--output-file', default=None, type=str, help='Output video file path') + parser.add_argument( + '--output-fourcc', + default='MJPG', + type=str, + help='Fourcc of the output video') + parser.add_argument( + '--output-fps', default=-1, type=int, help='FPS of the output video') + parser.add_argument( + '--output-height', + default=-1, + type=int, + help='Frame height of the output video') + parser.add_argument( + '--output-width', + default=-1, + type=int, + help='Frame width of the output video') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + args = parser.parse_args() + + assert args.show or args.output_file, \ + 'At least one output should be enabled.' + + # build the model from a config file and a checkpoint file + model = init_segmentor(args.config, args.checkpoint, device=args.device) + + # build input video + cap = cv2.VideoCapture(args.video) + assert (cap.isOpened()) + input_height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) + input_width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) + input_fps = cap.get(cv2.CAP_PROP_FPS) + + # init output video + writer = None + output_height = None + output_width = None + if args.output_file is not None: + fourcc = cv2.VideoWriter_fourcc(*args.output_fourcc) + output_fps = args.output_fps if args.output_fps > 0 else input_fps + output_height = args.output_height if args.output_height > 0 else int( + input_height) + output_width = args.output_width if args.output_width > 0 else int( + input_width) + writer = cv2.VideoWriter(args.output_file, fourcc, output_fps, + (output_width, output_height), True) + + # start looping + try: + while True: + flag, frame = cap.read() + if not flag: + break + + # test a single image + result = inference_segmentor(model, frame) + + # blend raw image and prediction + draw_img = model.show_result( + frame, + result, + palette=get_palette(args.palette), + show=False, + opacity=args.opacity) + + if args.show: + cv2.imshow('video_demo', draw_img) + cv2.waitKey(args.show_wait_time) + if writer: + if draw_img.shape[0] != output_height or draw_img.shape[ + 1] != output_width: + draw_img = cv2.resize(draw_img, + (output_width, output_height)) + writer.write(draw_img) + finally: + if writer: + writer.release() + cap.release() + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docker/Dockerfile b/prediction/image/mx15hdi/Detect/mmsegmentation/docker/Dockerfile new file mode 100644 index 0000000..90febac --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docker/Dockerfile @@ -0,0 +1,32 @@ +ARG PYTORCH="1.11.0" +ARG CUDA="11.3" +ARG CUDNN="8" + +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel + +ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0+PTX" +ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all" +ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" + +# To fix GPG key error when running apt-get update +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub + +RUN apt-get update && apt-get install -y git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN conda clean --all + +# Install MMCV +ARG PYTORCH +ARG CUDA +ARG MMCV +RUN ["/bin/bash", "-c", "pip install --no-cache-dir mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"] + +# Install MMSegmentation +RUN git clone https://github.com/open-mmlab/mmsegmentation.git /mmsegmentation +WORKDIR /mmsegmentation +ENV FORCE_CUDA="1" +RUN pip install -r requirements.txt +RUN pip install --no-cache-dir -e . diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docker/serve/Dockerfile b/prediction/image/mx15hdi/Detect/mmsegmentation/docker/serve/Dockerfile new file mode 100644 index 0000000..8a71902 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docker/serve/Dockerfile @@ -0,0 +1,49 @@ +ARG PYTORCH="1.11.0" +ARG CUDA="11.3" +ARG CUDNN="8" +FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel + +ARG MMCV="1.5.0" +ARG MMSEG="0.25.0" + +ENV PYTHONUNBUFFERED TRUE + +RUN apt-get update && \ + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \ + ca-certificates \ + g++ \ + openjdk-11-jre-headless \ + # MMDet Requirements + ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \ + && rm -rf /var/lib/apt/lists/* + +ENV PATH="/opt/conda/bin:$PATH" +RUN export FORCE_CUDA=1 + +# TORCHSEVER +RUN pip install torchserve torch-model-archiver + +# MMLAB +ARG PYTORCH +ARG CUDA +RUN ["/bin/bash", "-c", "pip install mmcv-full==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"] +RUN pip install mmsegmentation==${MMSEG} + +RUN useradd -m model-server \ + && mkdir -p /home/model-server/tmp + +COPY entrypoint.sh /usr/local/bin/entrypoint.sh + +RUN chmod +x /usr/local/bin/entrypoint.sh \ + && chown -R model-server /home/model-server + +COPY config.properties /home/model-server/config.properties +RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store + +EXPOSE 8080 8081 8082 + +USER model-server +WORKDIR /home/model-server +ENV TEMP=/home/model-server/tmp +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] +CMD ["serve"] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docker/serve/config.properties b/prediction/image/mx15hdi/Detect/mmsegmentation/docker/serve/config.properties new file mode 100644 index 0000000..efb9c47 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docker/serve/config.properties @@ -0,0 +1,5 @@ +inference_address=http://0.0.0.0:8080 +management_address=http://0.0.0.0:8081 +metrics_address=http://0.0.0.0:8082 +model_store=/home/model-server/model-store +load_models=all diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docker/serve/entrypoint.sh b/prediction/image/mx15hdi/Detect/mmsegmentation/docker/serve/entrypoint.sh new file mode 100644 index 0000000..41ba00b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docker/serve/entrypoint.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +if [[ "$1" = "serve" ]]; then + shift 1 + torchserve --start --ts-config /home/model-server/config.properties +else + eval "$@" +fi + +# prevent docker exit +tail -f /dev/null diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/Makefile b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/_static/css/readthedocs.css b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/_static/css/readthedocs.css new file mode 100644 index 0000000..2e38d08 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/_static/css/readthedocs.css @@ -0,0 +1,6 @@ +.header-logo { + background-image: url("../images/mmsegmentation.png"); + background-size: 201px 40px; + height: 40px; + width: 201px; +} diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/_static/images/mmsegmentation.png b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/_static/images/mmsegmentation.png new file mode 100644 index 0000000..009083a Binary files /dev/null and b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/_static/images/mmsegmentation.png differ diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/api.rst b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/api.rst new file mode 100644 index 0000000..8285841 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/api.rst @@ -0,0 +1,58 @@ +mmseg.apis +-------------- +.. automodule:: mmseg.apis + :members: + +mmseg.core +-------------- + +seg +^^^^^^^^^^ +.. automodule:: mmseg.core.seg + :members: + +evaluation +^^^^^^^^^^ +.. automodule:: mmseg.core.evaluation + :members: + +utils +^^^^^^^^^^ +.. automodule:: mmseg.core.utils + :members: + +mmseg.datasets +-------------- + +datasets +^^^^^^^^^^ +.. automodule:: mmseg.datasets + :members: + +pipelines +^^^^^^^^^^ +.. automodule:: mmseg.datasets.pipelines + :members: + +mmseg.models +-------------- + +segmentors +^^^^^^^^^^ +.. automodule:: mmseg.models.segmentors + :members: + +backbones +^^^^^^^^^^ +.. automodule:: mmseg.models.backbones + :members: + +decode_heads +^^^^^^^^^^^^ +.. automodule:: mmseg.models.decode_heads + :members: + +losses +^^^^^^^^^^ +.. automodule:: mmseg.models.losses + :members: diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/changelog.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/changelog.md new file mode 100644 index 0000000..2ff2398 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/changelog.md @@ -0,0 +1,759 @@ +## Changelog + +### V0.25.0 (6/2/2022) + +**Highlights** + +- Support PyTorch backend on MLU ([1515](https://github.com/open-mmlab/mmsegmentation/pull/1515)) + +**Bug Fixes** + +- Fix the error of BCE loss when batch size is 1 ([1629](https://github.com/open-mmlab/mmsegmentation/pull/1629)) +- Fix bug of `resize` function when align_corners is True ([1592](https://github.com/open-mmlab/mmsegmentation/pull/1592)) +- Fix Dockerfile to run demo script in docker container ([1568](https://github.com/open-mmlab/mmsegmentation/pull/1568)) +- Correct inference_demo.ipynb path ([1576](https://github.com/open-mmlab/mmsegmentation/pull/1576)) +- Fix the `build_segmentor` in colab demo ([1551](https://github.com/open-mmlab/mmsegmentation/pull/1551)) +- Fix md2yml script ([1633](https://github.com/open-mmlab/mmsegmentation/pull/1633), [1555](https://github.com/open-mmlab/mmsegmentation/pull/1555)) +- Fix main line link in MAE README.md ([1556](https://github.com/open-mmlab/mmsegmentation/pull/1556)) +- Fix fastfcn `crop_size` in README.md by ([1597](https://github.com/open-mmlab/mmsegmentation/pull/1597)) +- Pip upgrade when testing windows platform ([1610](https://github.com/open-mmlab/mmsegmentation/pull/1610)) + +**Improvements** + +- Delete DS_Store file ([1549](https://github.com/open-mmlab/mmsegmentation/pull/1549)) +- Revise owners.yml ([1621](https://github.com/open-mmlab/mmsegmentation/pull/1621), [1534](https://github.com/open-mmlab/mmsegmentation/pull/1543)) + +**Documentation** + +- Rewrite the installation guidance ([1630](https://github.com/open-mmlab/mmsegmentation/pull/1630)) +- Format readme ([1635](https://github.com/open-mmlab/mmsegmentation/pull/1635)) +- Replace markdownlint with mdformat to avoid ruby installation ([1591](https://github.com/open-mmlab/mmsegmentation/pull/1591)) +- Add explanation and usage instructions for data configuration ([1548](https://github.com/open-mmlab/mmsegmentation/pull/1548)) +- Configure Myst-parser to parse anchor tag ([1589](https://github.com/open-mmlab/mmsegmentation/pull/1589)) +- Update QR code and link for QQ group ([1598](https://github.com/open-mmlab/mmsegmentation/pull/1598), [1574](https://github.com/open-mmlab/mmsegmentation/pull/1574)) + +**Contributors** + +- @atinfinity made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1568 +- @DoubleChuang made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1576 +- @alpha-baymax made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1515 +- @274869388 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1629 + +### V0.24.1 (5/1/2022) + +**Bug Fixes** + +- Fix `LayerDecayOptimizerConstructor` for MAE training ([#1539](https://github.com/open-mmlab/mmsegmentation/pull/1539), [#1540](https://github.com/open-mmlab/mmsegmentation/pull/1540)) + +### V0.24.0 (4/29/2022) + +**Highlights** + +- Support MAE: Masked Autoencoders Are Scalable Vision Learners +- Support Resnet strikes back + +**New Features** + +- Support MAE: Masked Autoencoders Are Scalable Vision Learners ([1307](https://github.com/open-mmlab/mmsegmentation/pull/1307), [1523](https://github.com/open-mmlab/mmsegmentation/pull/1523)) +- Support Resnet strikes back ([1390](https://github.com/open-mmlab/mmsegmentation/pull/1390)) +- Support extra dataloader settings in configs ([1435](https://github.com/open-mmlab/mmsegmentation/pull/1435)) + +**Bug Fixes** + +- Fix input previous results for the last cascade_decode_head ([#1450](https://github.com/open-mmlab/mmsegmentation/pull/1450)) +- Fix validation loss logging ([#1494](https://github.com/open-mmlab/mmsegmentation/pull/1494)) +- Fix the bug in binary_cross_entropy ([1527](https://github.com/open-mmlab/mmsegmentation/pull/1527)) +- Support single channel prediction for Binary Cross Entropy Loss ([#1454](https://github.com/open-mmlab/mmsegmentation/pull/1454)) +- Fix potential bugs in accuracy.py ([1496](https://github.com/open-mmlab/mmsegmentation/pull/1496)) +- Avoid converting label ids twice by label map during evaluation ([1417](https://github.com/open-mmlab/mmsegmentation/pull/1417)) +- Fix bug about label_map ([1445](https://github.com/open-mmlab/mmsegmentation/pull/1445)) +- Fix image save path bug in Windows ([1423](https://github.com/open-mmlab/mmsegmentation/pull/1423)) +- Fix MMSegmentation Colab demo ([1501](https://github.com/open-mmlab/mmsegmentation/pull/1501), [1452](https://github.com/open-mmlab/mmsegmentation/pull/1452)) +- Migrate azure blob for beit checkpoints ([1503](https://github.com/open-mmlab/mmsegmentation/pull/1503)) +- Fix bug in `tools/analyse_logs.py` caused by wrong plot_iter in some cases ([1428](https://github.com/open-mmlab/mmsegmentation/pull/1428)) + +**Improvements** + +- Merge BEiT and ConvNext's LR decay optimizer constructors ([#1438](https://github.com/open-mmlab/mmsegmentation/pull/1438)) +- Register optimizer constructor with mmseg ([#1456](https://github.com/open-mmlab/mmsegmentation/pull/1456)) +- Refactor transformer encode layer in ViT and BEiT backbone ([#1481](https://github.com/open-mmlab/mmsegmentation/pull/1481)) +- Add `build_pos_embed` and `build_layers` for BEiT ([1517](https://github.com/open-mmlab/mmsegmentation/pull/1517)) +- Add `with_cp` to mit and vit ([1431](https://github.com/open-mmlab/mmsegmentation/pull/1431)) +- Fix inconsistent dtype of `seg_label` in stdc decode ([1463](https://github.com/open-mmlab/mmsegmentation/pull/1463)) +- Delete random seed for training in `dist_train.sh` ([1519](https://github.com/open-mmlab/mmsegmentation/pull/1519)) +- Revise high `workers_per_gpus` in config file ([#1506](https://github.com/open-mmlab/mmsegmentation/pull/1506)) +- Add GPG keys and del mmcv version in Dockerfile ([1534](https://github.com/open-mmlab/mmsegmentation/pull/1534)) +- Update checkpoint for model in deeplabv3plus ([#1487](https://github.com/open-mmlab/mmsegmentation/pull/1487)) +- Add `DistSamplerSeedHook` to set epoch number to dataloader when runner is `EpochBasedRunner` ([1449](https://github.com/open-mmlab/mmsegmentation/pull/1449)) +- Provide URLs of Swin Transformer pretrained models ([1389](https://github.com/open-mmlab/mmsegmentation/pull/1389)) +- Updating Dockerfiles From Docker Directory and `get_started.md` to reach latest stable version of Python, PyTorch and MMCV ([1446](https://github.com/open-mmlab/mmsegmentation/pull/1446)) + +**Documentation** + +- Add more clearly statement of CPU training/inference ([1518](https://github.com/open-mmlab/mmsegmentation/pull/1518)) + +**Contributors** + +- @jiangyitong made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1431 +- @kahkeng made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1447 +- @Nourollah made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1446 +- @androbaza made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1452 +- @Yzichen made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1445 +- @whu-pzhang made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1423 +- @panfeng-hover made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1417 +- @Johnson-Wang made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1496 +- @jere357 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1460 +- @mfernezir made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1494 +- @donglixp made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1503 +- @YuanLiuuuuuu made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1307 +- @Dawn-bin made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1527 + +### V0.23.0 (4/1/2022) + +**Highlights** + +- Support BEiT: BERT Pre-Training of Image Transformers +- Support K-Net: Towards Unified Image Segmentation +- Add `avg_non_ignore` of CELoss to support average loss over non-ignored elements +- Support dataset initialization with file client + +**New Features** + +- Support BEiT: BERT Pre-Training of Image Transformers ([#1404](https://github.com/open-mmlab/mmsegmentation/pull/1404)) +- Support K-Net: Towards Unified Image Segmentation ([#1289](https://github.com/open-mmlab/mmsegmentation/pull/1289)) +- Support dataset initialization with file client ([#1402](https://github.com/open-mmlab/mmsegmentation/pull/1402)) +- Add class name function for STARE datasets ([#1376](https://github.com/open-mmlab/mmsegmentation/pull/1376)) +- Support different seeds on different ranks when distributed training ([#1362](https://github.com/open-mmlab/mmsegmentation/pull/1362)) +- Add `nlc2nchw2nlc` and `nchw2nlc2nchw` to simplify tensor with different dimension operation ([#1249](https://github.com/open-mmlab/mmsegmentation/pull/1249)) + +**Improvements** + +- Synchronize random seed for distributed sampler ([#1411](https://github.com/open-mmlab/mmsegmentation/pull/1411)) +- Add script and documentation for multi-machine distributed training ([#1383](https://github.com/open-mmlab/mmsegmentation/pull/1383)) + +**Bug Fixes** + +- Add `avg_non_ignore` of CELoss to support average loss over non-ignored elements ([#1409](https://github.com/open-mmlab/mmsegmentation/pull/1409)) +- Fix some wrong URLs of models or logs in `./configs` ([#1336](https://github.com/open-mmlab/mmsegmentation/pull/1433)) +- Add title and color theme arguments to plot function in `tools/confusion_matrix.py` ([#1401](https://github.com/open-mmlab/mmsegmentation/pull/1401)) +- Fix outdated link in Colab demo ([#1392](https://github.com/open-mmlab/mmsegmentation/pull/1392)) +- Fix typos ([#1424](https://github.com/open-mmlab/mmsegmentation/pull/1424), [#1405](https://github.com/open-mmlab/mmsegmentation/pull/1405), [#1371](https://github.com/open-mmlab/mmsegmentation/pull/1371), [#1366](https://github.com/open-mmlab/mmsegmentation/pull/1366), [#1363](https://github.com/open-mmlab/mmsegmentation/pull/1363)) + +**Documentation** + +- Add FAQ document ([#1420](https://github.com/open-mmlab/mmsegmentation/pull/1420)) +- Fix the config name style description in official docs([#1414](https://github.com/open-mmlab/mmsegmentation/pull/1414)) + +**Contributors** + +- @kinglintianxia made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1371 +- @CCODING04 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1376 +- @mob5566 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1401 +- @xiongnemo made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1392 +- @Xiangxu-0103 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1405 + +### V0.22.1 (3/9/2022) + +**Bug Fixes** + +- Fix the ZeroDivisionError that all pixels in one image is ignored. ([#1336](https://github.com/open-mmlab/mmsegmentation/pull/1336)) + +**Improvements** + +- Provide URLs of STDC, Segmenter and Twins pretrained models ([#1272](https://github.com/open-mmlab/mmsegmentation/pull/1357)) + +### V0.22 (3/04/2022) + +**Highlights** + +- Support ConvNeXt: A ConvNet for the 2020s. Please use the latest MMClassification (0.21.0) to try it out. +- Support iSAID aerial Dataset. +- Officially Support inference on Windows OS. + +**New Features** + +- Support ConvNeXt: A ConvNet for the 2020s. ([#1216](https://github.com/open-mmlab/mmsegmentation/pull/1216)) +- Support iSAID aerial Dataset. ([#1115](https://github.com/open-mmlab/mmsegmentation/pull/1115) +- Generating and plotting confusion matrix. ([#1301](https://github.com/open-mmlab/mmsegmentation/pull/1301)) + +**Improvements** + +- Refactor 4 decoder heads (ASPP, FCN, PSP, UPer): Split forward function into `_forward_feature` and `cls_seg`. ([#1299](https://github.com/open-mmlab/mmsegmentation/pull/1299)) +- Add `min_size` arg in `Resize` to keep the shape after resize bigger than slide window. ([#1318](https://github.com/open-mmlab/mmsegmentation/pull/1318)) +- Revise pre-commit-hooks. ([#1315](https://github.com/open-mmlab/mmsegmentation/pull/1315)) +- Add win-ci. ([#1296](https://github.com/open-mmlab/mmsegmentation/pull/1296)) + +**Bug Fixes** + +- Fix `mlp_ratio` type in Swin Transformer. ([#1274](https://github.com/open-mmlab/mmsegmentation/pull/1274)) +- Fix path errors in `./demo` . ([#1269](https://github.com/open-mmlab/mmsegmentation/pull/1269)) +- Fix bug in conversion of potsdam. ([#1279](https://github.com/open-mmlab/mmsegmentation/pull/1279)) +- Make accuracy take into account `ignore_index`. ([#1259](https://github.com/open-mmlab/mmsegmentation/pull/1259)) +- Add Pytorch HardSwish assertion in unit test. ([#1294](https://github.com/open-mmlab/mmsegmentation/pull/1294)) +- Fix wrong palette value in vaihingen. ([#1292](https://github.com/open-mmlab/mmsegmentation/pull/1292)) +- Fix the bug that SETR cannot load pretrain. ([#1293](https://github.com/open-mmlab/mmsegmentation/pull/1293)) +- Update correct `In Collection` in metafile of each configs. ([#1239](https://github.com/open-mmlab/mmsegmentation/pull/1239)) +- Upload completed STDC models. ([#1332](https://github.com/open-mmlab/mmsegmentation/pull/1332)) +- Fix `DNLHead` exports onnx inference difference type Cast error. ([#1161](https://github.com/open-mmlab/mmsegmentation/pull/1332)) + +**Contributors** + +- @JiaYanhao made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1269 +- @andife made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1281 +- @SBCV made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1279 +- @HJoonKwon made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1259 +- @Tsingularity made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1290 +- @Waterman0524 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1115 +- @MeowZheng made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1315 +- @linfangjian01 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1318 + +### V0.21.1 (2/9/2022) + +**Bug Fixes** + +- Fix typos in docs. ([#1263](https://github.com/open-mmlab/mmsegmentation/pull/1263)) +- Fix repeating log by `setup_multi_processes`. ([#1267](https://github.com/open-mmlab/mmsegmentation/pull/1267)) +- Upgrade isort in pre-commit hook. ([#1270](https://github.com/open-mmlab/mmsegmentation/pull/1270)) + +**Improvements** + +- Use MMCV load_state_dict func in ViT/Swin. ([#1272](https://github.com/open-mmlab/mmsegmentation/pull/1272)) +- Add exception for PointRend for support CPU-only. ([#1271](https://github.com/open-mmlab/mmsegmentation/pull/1270)) + +### V0.21 (1/29/2022) + +**Highlights** + +- Officially Support CPUs training and inference, please use the latest MMCV (1.4.4) to try it out. +- Support Segmenter: Transformer for Semantic Segmentation (ICCV'2021). +- Support ISPRS Potsdam and Vaihingen Dataset. +- Add Mosaic transform and `MultiImageMixDataset` class in `dataset_wrappers`. + +**New Features** + +- Support Segmenter: Transformer for Semantic Segmentation (ICCV'2021) ([#955](https://github.com/open-mmlab/mmsegmentation/pull/955)) +- Support ISPRS Potsdam and Vaihingen Dataset ([#1097](https://github.com/open-mmlab/mmsegmentation/pull/1097), [#1171](https://github.com/open-mmlab/mmsegmentation/pull/1171)) +- Add segformer‘s benchmark on cityscapes ([#1155](https://github.com/open-mmlab/mmsegmentation/pull/1155)) +- Add auto resume ([#1172](https://github.com/open-mmlab/mmsegmentation/pull/1172)) +- Add Mosaic transform and `MultiImageMixDataset` class in `dataset_wrappers` ([#1093](https://github.com/open-mmlab/mmsegmentation/pull/1093), [#1105](https://github.com/open-mmlab/mmsegmentation/pull/1105)) +- Add log collector ([#1175](https://github.com/open-mmlab/mmsegmentation/pull/1175)) + +**Improvements** + +- New-style CPU training and inference ([#1251](https://github.com/open-mmlab/mmsegmentation/pull/1251)) +- Add UNet benchmark with multiple losses supervision ([#1143](https://github.com/open-mmlab/mmsegmentation/pull/1143)) + +**Bug Fixes** + +- Fix the model statistics in doc for readthedoc ([#1153](https://github.com/open-mmlab/mmsegmentation/pull/1153)) +- Set random seed for `palette` if not given ([#1152](https://github.com/open-mmlab/mmsegmentation/pull/1152)) +- Add `COCOStuffDataset` in `class_names.py` ([#1222](https://github.com/open-mmlab/mmsegmentation/pull/1222)) +- Fix bug in non-distributed multi-gpu training/testing ([#1247](https://github.com/open-mmlab/mmsegmentation/pull/1247)) +- Delete unnecessary lines of STDCHead ([#1231](https://github.com/open-mmlab/mmsegmentation/pull/1231)) + +**Contributors** + +- @jbwang1997 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1152 +- @BeaverCC made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1206 +- @Echo-minn made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1214 +- @rstrudel made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/955 + +### V0.20.2 (12/15/2021) + +**Bug Fixes** + +- Revise --option to --options to avoid BC-breaking. ([#1140](https://github.com/open-mmlab/mmsegmentation/pull/1140)) + +### V0.20.1 (12/14/2021) + +**Improvements** + +- Change options to cfg-options ([#1129](https://github.com/open-mmlab/mmsegmentation/pull/1129)) + +**Bug Fixes** + +- Fix `` in metafile. ([#1127](https://github.com/open-mmlab/mmsegmentation/pull/1127)) +- Fix correct `num_classes` of HRNet in `LoveDA` dataset ([#1136](https://github.com/open-mmlab/mmsegmentation/pull/1136)) + +### V0.20 (12/10/2021) + +**Highlights** + +- Support Twins ([#989](https://github.com/open-mmlab/mmsegmentation/pull/989)) +- Support a real-time segmentation model STDC ([#995](https://github.com/open-mmlab/mmsegmentation/pull/995)) +- Support a widely-used segmentation model in lane detection ERFNet ([#960](https://github.com/open-mmlab/mmsegmentation/pull/960)) +- Support A Remote Sensing Land-Cover Dataset LoveDA ([#1028](https://github.com/open-mmlab/mmsegmentation/pull/1028)) +- Support focal loss ([#1024](https://github.com/open-mmlab/mmsegmentation/pull/1024)) + +**New Features** + +- Support Twins ([#989](https://github.com/open-mmlab/mmsegmentation/pull/989)) +- Support a real-time segmentation model STDC ([#995](https://github.com/open-mmlab/mmsegmentation/pull/995)) +- Support a widely-used segmentation model in lane detection ERFNet ([#960](https://github.com/open-mmlab/mmsegmentation/pull/960)) +- Add SETR cityscapes benchmark ([#1087](https://github.com/open-mmlab/mmsegmentation/pull/1087)) +- Add BiSeNetV1 COCO-Stuff 164k benchmark ([#1019](https://github.com/open-mmlab/mmsegmentation/pull/1019)) +- Support focal loss ([#1024](https://github.com/open-mmlab/mmsegmentation/pull/1024)) +- Add Cutout transform ([#1022](https://github.com/open-mmlab/mmsegmentation/pull/1022)) + +**Improvements** + +- Set a random seed when the user does not set a seed ([#1039](https://github.com/open-mmlab/mmsegmentation/pull/1039)) +- Add CircleCI setup ([#1086](https://github.com/open-mmlab/mmsegmentation/pull/1086)) +- Skip CI on ignoring given paths ([#1078](https://github.com/open-mmlab/mmsegmentation/pull/1078)) +- Add abstract and image for every paper ([#1060](https://github.com/open-mmlab/mmsegmentation/pull/1060)) +- Create a symbolic link on windows ([#1090](https://github.com/open-mmlab/mmsegmentation/pull/1090)) +- Support video demo using trained model ([#1014](https://github.com/open-mmlab/mmsegmentation/pull/1014)) + +**Bug Fixes** + +- Fix incorrectly loading init_cfg or pretrained models of several transformer models ([#999](https://github.com/open-mmlab/mmsegmentation/pull/999), [#1069](https://github.com/open-mmlab/mmsegmentation/pull/1069), [#1102](https://github.com/open-mmlab/mmsegmentation/pull/1102)) +- Fix EfficientMultiheadAttention in SegFormer ([#1037](https://github.com/open-mmlab/mmsegmentation/pull/1037)) +- Remove `fp16` folder in `configs` ([#1031](https://github.com/open-mmlab/mmsegmentation/pull/1031)) +- Fix several typos in .yml file (Dice Metric [#1041](https://github.com/open-mmlab/mmsegmentation/pull/1041), ADE20K dataset [#1120](https://github.com/open-mmlab/mmsegmentation/pull/1120), Training Memory (GB) [#1083](https://github.com/open-mmlab/mmsegmentation/pull/1083)) +- Fix test error when using `--show-dir` ([#1091](https://github.com/open-mmlab/mmsegmentation/pull/1091)) +- Fix dist training infinite waiting issue ([#1035](https://github.com/open-mmlab/mmsegmentation/pull/1035)) +- Change the upper version of mmcv to 1.5.0 ([#1096](https://github.com/open-mmlab/mmsegmentation/pull/1096)) +- Fix symlink failure on Windows ([#1038](https://github.com/open-mmlab/mmsegmentation/pull/1038)) +- Cancel previous runs that are not completed ([#1118](https://github.com/open-mmlab/mmsegmentation/pull/1118)) +- Unified links of readthedocs in docs ([#1119](https://github.com/open-mmlab/mmsegmentation/pull/1119)) + +**Contributors** + +- @Junjue-Wang made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1028 +- @ddebby made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1066 +- @del-zhenwu made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1078 +- @KangBK0120 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1106 +- @zergzzlun made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1091 +- @fingertap made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1035 +- @irvingzhang0512 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1014 +- @littleSunlxy made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/989 +- @lkm2835 +- @RockeyCoss +- @MengzhangLI +- @Junjun2016 +- @xiexinch +- @xvjiarui + +### V0.19 (11/02/2021) + +**Highlights** + +- Support TIMMBackbone wrapper ([#998](https://github.com/open-mmlab/mmsegmentation/pull/998)) +- Support custom hook ([#428](https://github.com/open-mmlab/mmsegmentation/pull/428)) +- Add codespell pre-commit hook ([#920](https://github.com/open-mmlab/mmsegmentation/pull/920)) +- Add FastFCN benchmark on ADE20K ([#972](https://github.com/open-mmlab/mmsegmentation/pull/972)) + +**New Features** + +- Support TIMMBackbone wrapper ([#998](https://github.com/open-mmlab/mmsegmentation/pull/998)) +- Support custom hook ([#428](https://github.com/open-mmlab/mmsegmentation/pull/428)) +- Add FastFCN benchmark on ADE20K ([#972](https://github.com/open-mmlab/mmsegmentation/pull/972)) +- Add codespell pre-commit hook and fix typos ([#920](https://github.com/open-mmlab/mmsegmentation/pull/920)) + +**Improvements** + +- Make inputs & channels smaller in unittests ([#1004](https://github.com/open-mmlab/mmsegmentation/pull/1004)) +- Change `self.loss_decode` back to `dict` in Single Loss situation ([#1002](https://github.com/open-mmlab/mmsegmentation/pull/1002)) + +**Bug Fixes** + +- Fix typo in usage example ([#1003](https://github.com/open-mmlab/mmsegmentation/pull/1003)) +- Add contiguous after permutation in ViT ([#992](https://github.com/open-mmlab/mmsegmentation/pull/992)) +- Fix the invalid link ([#985](https://github.com/open-mmlab/mmsegmentation/pull/985)) +- Fix bug in CI with python 3.9 ([#994](https://github.com/open-mmlab/mmsegmentation/pull/994)) +- Fix bug when loading class name form file in custom dataset ([#923](https://github.com/open-mmlab/mmsegmentation/pull/923)) + +**Contributors** + +- @ShoupingShan made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/923 +- @RockeyCoss made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/954 +- @HarborYuan made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/992 +- @lkm2835 made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/1003 +- @gszh made their first contribution in https://github.com/open-mmlab/mmsegmentation/pull/428 +- @VVsssssk +- @MengzhangLI +- @Junjun2016 + +### V0.18 (10/07/2021) + +**Highlights** + +- Support three real-time segmentation models (ICNet [#884](https://github.com/open-mmlab/mmsegmentation/pull/884), BiSeNetV1 [#851](https://github.com/open-mmlab/mmsegmentation/pull/851), and BiSeNetV2 [#804](https://github.com/open-mmlab/mmsegmentation/pull/804)) +- Support one efficient segmentation model (FastFCN [#885](https://github.com/open-mmlab/mmsegmentation/pull/885)) +- Support one efficient non-local/self-attention based segmentation model (ISANet [#70](https://github.com/open-mmlab/mmsegmentation/pull/70)) +- Support COCO-Stuff 10k and 164k datasets ([#625](https://github.com/open-mmlab/mmsegmentation/pull/625)) +- Support evaluate concated dataset separately ([#833](https://github.com/open-mmlab/mmsegmentation/pull/833)) +- Support loading GT for evaluation from multi-file backend ([#867](https://github.com/open-mmlab/mmsegmentation/pull/867)) + +**New Features** + +- Support three real-time segmentation models (ICNet [#884](https://github.com/open-mmlab/mmsegmentation/pull/884), BiSeNetV1 [#851](https://github.com/open-mmlab/mmsegmentation/pull/851), and BiSeNetV2 [#804](https://github.com/open-mmlab/mmsegmentation/pull/804)) +- Support one efficient segmentation model (FastFCN [#885](https://github.com/open-mmlab/mmsegmentation/pull/885)) +- Support one efficient non-local/self-attention based segmentation model (ISANet [#70](https://github.com/open-mmlab/mmsegmentation/pull/70)) +- Support COCO-Stuff 10k and 164k datasets ([#625](https://github.com/open-mmlab/mmsegmentation/pull/625)) +- Support evaluate concated dataset separately ([#833](https://github.com/open-mmlab/mmsegmentation/pull/833)) + +**Improvements** + +- Support loading GT for evaluation from multi-file backend ([#867](https://github.com/open-mmlab/mmsegmentation/pull/867)) +- Auto-convert SyncBN to BN when training on DP automatly([#772](https://github.com/open-mmlab/mmsegmentation/pull/772)) +- Refactor Swin-Transformer ([#800](https://github.com/open-mmlab/mmsegmentation/pull/800)) + +**Bug Fixes** + +- Update mmcv installation in dockerfile ([#860](https://github.com/open-mmlab/mmsegmentation/pull/860)) +- Fix number of iteration bug when resuming checkpoint in distributed train ([#866](https://github.com/open-mmlab/mmsegmentation/pull/866)) +- Fix parsing parse in val_step ([#906](https://github.com/open-mmlab/mmsegmentation/pull/906)) + +### V0.17 (09/01/2021) + +**Highlights** + +- Support SegFormer +- Support DPT +- Support Dark Zurich and Nighttime Driving datasets +- Support progressive evaluation + +**New Features** + +- Support SegFormer ([#599](https://github.com/open-mmlab/mmsegmentation/pull/599)) +- Support DPT ([#605](https://github.com/open-mmlab/mmsegmentation/pull/605)) +- Support Dark Zurich and Nighttime Driving datasets ([#815](https://github.com/open-mmlab/mmsegmentation/pull/815)) +- Support progressive evaluation ([#709](https://github.com/open-mmlab/mmsegmentation/pull/709)) + +**Improvements** + +- Add multiscale_output interface and unittests for HRNet ([#830](https://github.com/open-mmlab/mmsegmentation/pull/830)) +- Support inherit cityscapes dataset ([#750](https://github.com/open-mmlab/mmsegmentation/pull/750)) +- Fix some typos in README.md ([#824](https://github.com/open-mmlab/mmsegmentation/pull/824)) +- Delete convert function and add instruction to ViT/Swin README.md ([#791](https://github.com/open-mmlab/mmsegmentation/pull/791)) +- Add vit/swin/mit convert weight scripts ([#783](https://github.com/open-mmlab/mmsegmentation/pull/783)) +- Add copyright files ([#796](https://github.com/open-mmlab/mmsegmentation/pull/796)) + +**Bug Fixes** + +- Fix invalid checkpoint link in inference_demo.ipynb ([#814](https://github.com/open-mmlab/mmsegmentation/pull/814)) +- Ensure that items in dataset have the same order across multi machine ([#780](https://github.com/open-mmlab/mmsegmentation/pull/780)) +- Fix the log error ([#766](https://github.com/open-mmlab/mmsegmentation/pull/766)) + +### V0.16 (08/04/2021) + +**Highlights** + +- Support PyTorch 1.9 +- Support SegFormer backbone MiT +- Support md2yml pre-commit hook +- Support frozen stage for HRNet + +**New Features** + +- Support SegFormer backbone MiT ([#594](https://github.com/open-mmlab/mmsegmentation/pull/594)) +- Support md2yml pre-commit hook ([#732](https://github.com/open-mmlab/mmsegmentation/pull/732)) +- Support mim ([#717](https://github.com/open-mmlab/mmsegmentation/pull/717)) +- Add mmseg2torchserve tool ([#552](https://github.com/open-mmlab/mmsegmentation/pull/552)) + +**Improvements** + +- Support hrnet frozen stage ([#743](https://github.com/open-mmlab/mmsegmentation/pull/743)) +- Add template of reimplementation questions ([#741](https://github.com/open-mmlab/mmsegmentation/pull/741)) +- Output pdf and epub formats for readthedocs ([#742](https://github.com/open-mmlab/mmsegmentation/pull/742)) +- Refine the docstring of ResNet ([#723](https://github.com/open-mmlab/mmsegmentation/pull/723)) +- Replace interpolate with resize ([#731](https://github.com/open-mmlab/mmsegmentation/pull/731)) +- Update resource limit ([#700](https://github.com/open-mmlab/mmsegmentation/pull/700)) +- Update config.md ([#678](https://github.com/open-mmlab/mmsegmentation/pull/678)) + +**Bug Fixes** + +- Fix ATTENTION registry ([#729](https://github.com/open-mmlab/mmsegmentation/pull/729)) +- Fix analyze log script ([#716](https://github.com/open-mmlab/mmsegmentation/pull/716)) +- Fix doc api display ([#725](https://github.com/open-mmlab/mmsegmentation/pull/725)) +- Fix patch_embed and pos_embed mismatch error ([#685](https://github.com/open-mmlab/mmsegmentation/pull/685)) +- Fix efficient test for multi-node ([#707](https://github.com/open-mmlab/mmsegmentation/pull/707)) +- Fix init_cfg in resnet backbone ([#697](https://github.com/open-mmlab/mmsegmentation/pull/697)) +- Fix efficient test bug ([#702](https://github.com/open-mmlab/mmsegmentation/pull/702)) +- Fix url error in config docs ([#680](https://github.com/open-mmlab/mmsegmentation/pull/680)) +- Fix mmcv installation ([#676](https://github.com/open-mmlab/mmsegmentation/pull/676)) +- Fix torch version ([#670](https://github.com/open-mmlab/mmsegmentation/pull/670)) + +**Contributors** + +@sshuair @xiexinch @Junjun2016 @mmeendez8 @xvjiarui @sennnnn @puhsu @BIGWangYuDong @keke1u @daavoo + +### V0.15 (07/04/2021) + +**Highlights** + +- Support ViT, SETR, and Swin-Transformer +- Add Chinese documentation +- Unified parameter initialization + +**Bug Fixes** + +- Fix typo and links ([#608](https://github.com/open-mmlab/mmsegmentation/pull/608)) +- Fix Dockerfile ([#607](https://github.com/open-mmlab/mmsegmentation/pull/607)) +- Fix ViT init ([#609](https://github.com/open-mmlab/mmsegmentation/pull/609)) +- Fix mmcv version compatible table ([#658](https://github.com/open-mmlab/mmsegmentation/pull/658)) +- Fix model links of DMNEt ([#660](https://github.com/open-mmlab/mmsegmentation/pull/660)) + +**New Features** + +- Support loading DeiT weights ([#538](https://github.com/open-mmlab/mmsegmentation/pull/538)) +- Support SETR ([#531](https://github.com/open-mmlab/mmsegmentation/pull/531), [#635](https://github.com/open-mmlab/mmsegmentation/pull/635)) +- Add config and models for ViT backbone with UperHead ([#520](https://github.com/open-mmlab/mmsegmentation/pull/531), [#635](https://github.com/open-mmlab/mmsegmentation/pull/520)) +- Support Swin-Transformer ([#511](https://github.com/open-mmlab/mmsegmentation/pull/511)) +- Add higher accuracy FastSCNN ([#606](https://github.com/open-mmlab/mmsegmentation/pull/606)) +- Add Chinese documentation ([#666](https://github.com/open-mmlab/mmsegmentation/pull/666)) + +**Improvements** + +- Unified parameter initialization ([#567](https://github.com/open-mmlab/mmsegmentation/pull/567)) +- Separate CUDA and CPU in github action CI ([#602](https://github.com/open-mmlab/mmsegmentation/pull/602)) +- Support persistent dataloader worker ([#646](https://github.com/open-mmlab/mmsegmentation/pull/646)) +- Update meta file fields ([#661](https://github.com/open-mmlab/mmsegmentation/pull/661), [#664](https://github.com/open-mmlab/mmsegmentation/pull/664)) + +### V0.14 (06/02/2021) + +**Highlights** + +- Support ONNX to TensorRT +- Support MIM + +**Bug Fixes** + +- Fix ONNX to TensorRT verify ([#547](https://github.com/open-mmlab/mmsegmentation/pull/547)) +- Fix save best for EvalHook ([#575](https://github.com/open-mmlab/mmsegmentation/pull/575)) + +**New Features** + +- Support loading DeiT weights ([#538](https://github.com/open-mmlab/mmsegmentation/pull/538)) +- Support ONNX to TensorRT ([#542](https://github.com/open-mmlab/mmsegmentation/pull/542)) +- Support output results for ADE20k ([#544](https://github.com/open-mmlab/mmsegmentation/pull/544)) +- Support MIM ([#549](https://github.com/open-mmlab/mmsegmentation/pull/549)) + +**Improvements** + +- Add option for ViT output shape ([#530](https://github.com/open-mmlab/mmsegmentation/pull/530)) +- Infer batch size using len(result) ([#532](https://github.com/open-mmlab/mmsegmentation/pull/532)) +- Add compatible table between MMSeg and MMCV ([#558](https://github.com/open-mmlab/mmsegmentation/pull/558)) + +### V0.13 (05/05/2021) + +**Highlights** + +- Support Pascal Context Class-59 dataset. +- Support Visual Transformer Backbone. +- Support mFscore metric. + +**Bug Fixes** + +- Fixed Colaboratory tutorial ([#451](https://github.com/open-mmlab/mmsegmentation/pull/451)) +- Fixed mIoU calculation range ([#471](https://github.com/open-mmlab/mmsegmentation/pull/471)) +- Fixed sem_fpn, unet README.md ([#492](https://github.com/open-mmlab/mmsegmentation/pull/492)) +- Fixed `num_classes` in FCN for Pascal Context 60-class dataset ([#488](https://github.com/open-mmlab/mmsegmentation/pull/488)) +- Fixed FP16 inference ([#497](https://github.com/open-mmlab/mmsegmentation/pull/497)) + +**New Features** + +- Support dynamic export and visualize to pytorch2onnx ([#463](https://github.com/open-mmlab/mmsegmentation/pull/463)) +- Support export to torchscript ([#469](https://github.com/open-mmlab/mmsegmentation/pull/469), [#499](https://github.com/open-mmlab/mmsegmentation/pull/499)) +- Support Pascal Context Class-59 dataset ([#459](https://github.com/open-mmlab/mmsegmentation/pull/459)) +- Support Visual Transformer backbone ([#465](https://github.com/open-mmlab/mmsegmentation/pull/465)) +- Support UpSample Neck ([#512](https://github.com/open-mmlab/mmsegmentation/pull/512)) +- Support mFscore metric ([#509](https://github.com/open-mmlab/mmsegmentation/pull/509)) + +**Improvements** + +- Add more CI for PyTorch ([#460](https://github.com/open-mmlab/mmsegmentation/pull/460)) +- Add print model graph args for tools/print_config.py ([#451](https://github.com/open-mmlab/mmsegmentation/pull/451)) +- Add cfg links in modelzoo README.md ([#468](https://github.com/open-mmlab/mmsegmentation/pull/469)) +- Add BaseSegmentor import to segmentors/__init__.py ([#495](https://github.com/open-mmlab/mmsegmentation/pull/495)) +- Add MMOCR, MMGeneration links ([#501](https://github.com/open-mmlab/mmsegmentation/pull/501), [#506](https://github.com/open-mmlab/mmsegmentation/pull/506)) +- Add Chinese QR code ([#506](https://github.com/open-mmlab/mmsegmentation/pull/506)) +- Use MMCV MODEL_REGISTRY ([#515](https://github.com/open-mmlab/mmsegmentation/pull/515)) +- Add ONNX testing tools ([#498](https://github.com/open-mmlab/mmsegmentation/pull/498)) +- Replace data_dict calling 'img' key to support MMDet3D ([#514](https://github.com/open-mmlab/mmsegmentation/pull/514)) +- Support reading class_weight from file in loss function ([#513](https://github.com/open-mmlab/mmsegmentation/pull/513)) +- Make tags as comment ([#505](https://github.com/open-mmlab/mmsegmentation/pull/505)) +- Use MMCV EvalHook ([#438](https://github.com/open-mmlab/mmsegmentation/pull/438)) + +### V0.12 (04/03/2021) + +**Highlights** + +- Support FCN-Dilate 6 model. +- Support Dice Loss. + +**Bug Fixes** + +- Fixed PhotoMetricDistortion Doc ([#388](https://github.com/open-mmlab/mmsegmentation/pull/388)) +- Fixed install scripts ([#399](https://github.com/open-mmlab/mmsegmentation/pull/399)) +- Fixed Dice Loss multi-class ([#417](https://github.com/open-mmlab/mmsegmentation/pull/417)) + +**New Features** + +- Support Dice Loss ([#396](https://github.com/open-mmlab/mmsegmentation/pull/396)) +- Add plot logs tool ([#426](https://github.com/open-mmlab/mmsegmentation/pull/426)) +- Add opacity option to show_result ([#425](https://github.com/open-mmlab/mmsegmentation/pull/425)) +- Speed up mIoU metric ([#430](https://github.com/open-mmlab/mmsegmentation/pull/430)) + +**Improvements** + +- Refactor unittest file structure ([#440](https://github.com/open-mmlab/mmsegmentation/pull/440)) +- Fix typos in the repo ([#449](https://github.com/open-mmlab/mmsegmentation/pull/449)) +- Include class-level metrics in the log ([#445](https://github.com/open-mmlab/mmsegmentation/pull/445)) + +### V0.11 (02/02/2021) + +**Highlights** + +- Support memory efficient test, add more UNet models. + +**Bug Fixes** + +- Fixed TTA resize scale ([#334](https://github.com/open-mmlab/mmsegmentation/pull/334)) +- Fixed CI for pip 20.3 ([#307](https://github.com/open-mmlab/mmsegmentation/pull/307)) +- Fixed ADE20k test ([#359](https://github.com/open-mmlab/mmsegmentation/pull/359)) + +**New Features** + +- Support memory efficient test ([#330](https://github.com/open-mmlab/mmsegmentation/pull/330)) +- Add more UNet benchmarks ([#324](https://github.com/open-mmlab/mmsegmentation/pull/324)) +- Support Lovasz Loss ([#351](https://github.com/open-mmlab/mmsegmentation/pull/351)) + +**Improvements** + +- Move train_cfg/test_cfg inside model ([#341](https://github.com/open-mmlab/mmsegmentation/pull/341)) + +### V0.10 (01/01/2021) + +**Highlights** + +- Support MobileNetV3, DMNet, APCNet. Add models of ResNet18V1b, ResNet18V1c, ResNet50V1b. + +**Bug Fixes** + +- Fixed CPU TTA ([#276](https://github.com/open-mmlab/mmsegmentation/pull/276)) +- Fixed CI for pip 20.3 ([#307](https://github.com/open-mmlab/mmsegmentation/pull/307)) + +**New Features** + +- Add ResNet18V1b, ResNet18V1c, ResNet50V1b, ResNet101V1b models ([#316](https://github.com/open-mmlab/mmsegmentation/pull/316)) +- Support MobileNetV3 ([#268](https://github.com/open-mmlab/mmsegmentation/pull/268)) +- Add 4 retinal vessel segmentation benchmark ([#315](https://github.com/open-mmlab/mmsegmentation/pull/315)) +- Support DMNet ([#313](https://github.com/open-mmlab/mmsegmentation/pull/313)) +- Support APCNet ([#299](https://github.com/open-mmlab/mmsegmentation/pull/299)) + +**Improvements** + +- Refactor Documentation page ([#311](https://github.com/open-mmlab/mmsegmentation/pull/311)) +- Support resize data augmentation according to original image size ([#291](https://github.com/open-mmlab/mmsegmentation/pull/291)) + +### V0.9 (30/11/2020) + +**Highlights** + +- Support 4 medical dataset, UNet and CGNet. + +**New Features** + +- Support RandomRotate transform ([#215](https://github.com/open-mmlab/mmsegmentation/pull/215), [#260](https://github.com/open-mmlab/mmsegmentation/pull/260)) +- Support RGB2Gray transform ([#227](https://github.com/open-mmlab/mmsegmentation/pull/227)) +- Support Rerange transform ([#228](https://github.com/open-mmlab/mmsegmentation/pull/228)) +- Support ignore_index for BCE loss ([#210](https://github.com/open-mmlab/mmsegmentation/pull/210)) +- Add modelzoo statistics ([#263](https://github.com/open-mmlab/mmsegmentation/pull/263)) +- Support Dice evaluation metric ([#225](https://github.com/open-mmlab/mmsegmentation/pull/225)) +- Support Adjust Gamma transform ([#232](https://github.com/open-mmlab/mmsegmentation/pull/232)) +- Support CLAHE transform ([#229](https://github.com/open-mmlab/mmsegmentation/pull/229)) + +**Bug Fixes** + +- Fixed detail API link ([#267](https://github.com/open-mmlab/mmsegmentation/pull/267)) + +### V0.8 (03/11/2020) + +**Highlights** + +- Support 4 medical dataset, UNet and CGNet. + +**New Features** + +- Support customize runner ([#118](https://github.com/open-mmlab/mmsegmentation/pull/118)) +- Support UNet ([#161](https://github.com/open-mmlab/mmsegmentation/pull/162)) +- Support CHASE_DB1, DRIVE, STARE, HRD ([#203](https://github.com/open-mmlab/mmsegmentation/pull/203)) +- Support CGNet ([#223](https://github.com/open-mmlab/mmsegmentation/pull/223)) + +### V0.7 (07/10/2020) + +**Highlights** + +- Support Pascal Context dataset and customizing class dataset. + +**Bug Fixes** + +- Fixed CPU inference ([#153](https://github.com/open-mmlab/mmsegmentation/pull/153)) + +**New Features** + +- Add DeepLab OS16 models ([#154](https://github.com/open-mmlab/mmsegmentation/pull/154)) +- Support Pascal Context dataset ([#133](https://github.com/open-mmlab/mmsegmentation/pull/133)) +- Support customizing dataset classes ([#71](https://github.com/open-mmlab/mmsegmentation/pull/71)) +- Support customizing dataset palette ([#157](https://github.com/open-mmlab/mmsegmentation/pull/157)) + +**Improvements** + +- Support 4D tensor output in ONNX ([#150](https://github.com/open-mmlab/mmsegmentation/pull/150)) +- Remove redundancies in ONNX export ([#160](https://github.com/open-mmlab/mmsegmentation/pull/160)) +- Migrate to MMCV DepthwiseSeparableConv ([#158](https://github.com/open-mmlab/mmsegmentation/pull/158)) +- Migrate to MMCV collect_env ([#137](https://github.com/open-mmlab/mmsegmentation/pull/137)) +- Use img_prefix and seg_prefix for loading ([#153](https://github.com/open-mmlab/mmsegmentation/pull/153)) + +### V0.6 (10/09/2020) + +**Highlights** + +- Support new methods i.e. MobileNetV2, EMANet, DNL, PointRend, Semantic FPN, Fast-SCNN, ResNeSt. + +**Bug Fixes** + +- Fixed sliding inference ONNX export ([#90](https://github.com/open-mmlab/mmsegmentation/pull/90)) + +**New Features** + +- Support MobileNet v2 ([#86](https://github.com/open-mmlab/mmsegmentation/pull/86)) +- Support EMANet ([#34](https://github.com/open-mmlab/mmsegmentation/pull/34)) +- Support DNL ([#37](https://github.com/open-mmlab/mmsegmentation/pull/37)) +- Support PointRend ([#109](https://github.com/open-mmlab/mmsegmentation/pull/109)) +- Support Semantic FPN ([#94](https://github.com/open-mmlab/mmsegmentation/pull/94)) +- Support Fast-SCNN ([#58](https://github.com/open-mmlab/mmsegmentation/pull/58)) +- Support ResNeSt backbone ([#47](https://github.com/open-mmlab/mmsegmentation/pull/47)) +- Support ONNX export (experimental) ([#12](https://github.com/open-mmlab/mmsegmentation/pull/12)) + +**Improvements** + +- Support Upsample in ONNX ([#100](https://github.com/open-mmlab/mmsegmentation/pull/100)) +- Support Windows install (experimental) ([#75](https://github.com/open-mmlab/mmsegmentation/pull/75)) +- Add more OCRNet results ([#20](https://github.com/open-mmlab/mmsegmentation/pull/20)) +- Add PyTorch 1.6 CI ([#64](https://github.com/open-mmlab/mmsegmentation/pull/64)) +- Get version and githash automatically ([#55](https://github.com/open-mmlab/mmsegmentation/pull/55)) + +### v0.5.1 (11/08/2020) + +**Highlights** + +- Support FP16 and more generalized OHEM + +**Bug Fixes** + +- Fixed Pascal VOC conversion script (#19) +- Fixed OHEM weight assign bug (#54) +- Fixed palette type when palette is not given (#27) + +**New Features** + +- Support FP16 (#21) +- Generalized OHEM (#54) + +**Improvements** + +- Add load-from flag (#33) +- Fixed training tricks doc about different learning rates of model (#26) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/conf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/conf.py new file mode 100644 index 0000000..cd2113d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/conf.py @@ -0,0 +1,134 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import subprocess +import sys + +import pytorch_sphinx_theme + +sys.path.insert(0, os.path.abspath('../../')) + +# -- Project information ----------------------------------------------------- + +project = 'MMSegmentation' +copyright = '2020-2021, OpenMMLab' +author = 'MMSegmentation Authors' +version_file = '../../mmseg/version.py' + + +def get_version(): + with open(version_file, 'r') as f: + exec(compile(f.read(), version_file, 'exec')) + return locals()['__version__'] + + +# The full version, including alpha/beta/rc tags +release = get_version() + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', + 'sphinx_markdown_tables', 'sphinx_copybutton', 'myst_parser' +] + +autodoc_mock_imports = [ + 'matplotlib', 'pycocotools', 'mmseg.version', 'mmcv.ops' +] + +# Ignore >>> when copying code +copybutton_prompt_text = r'>>> |\.\.\. ' +copybutton_prompt_is_regexp = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = { + '.rst': 'restructuredtext', + '.md': 'markdown', +} + +# The master toctree document. +master_doc = 'index' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +# html_theme = 'sphinx_rtd_theme' +html_theme = 'pytorch_sphinx_theme' +html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] +html_theme_options = { + 'logo_url': + 'https://mmsegmentation.readthedocs.io/en/latest/', + 'menu': [ + { + 'name': + 'Tutorial', + 'url': + 'https://github.com/open-mmlab/mmsegmentation/blob/master/' + 'demo/MMSegmentation_Tutorial.ipynb' + }, + { + 'name': 'GitHub', + 'url': 'https://github.com/open-mmlab/mmsegmentation' + }, + { + 'name': + 'Upstream', + 'children': [ + { + 'name': 'MMCV', + 'url': 'https://github.com/open-mmlab/mmcv', + 'description': 'Foundational library for computer vision' + }, + ] + }, + ], + # Specify the language of shared menu + 'menu_lang': + 'en' +} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] +html_css_files = ['css/readthedocs.css'] + +# Enable ::: for my_st +myst_enable_extensions = ['colon_fence'] +myst_heading_anchors = 3 + +language = 'en' + + +def builder_inited_handler(app): + subprocess.run(['./stat.py']) + + +def setup(app): + app.connect('builder-inited', builder_inited_handler) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/dataset_prepare.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/dataset_prepare.md new file mode 100644 index 0000000..4982ce1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/dataset_prepare.md @@ -0,0 +1,378 @@ +## Prepare datasets + +It is recommended to symlink the dataset root to `$MMSEGMENTATION/data`. +If your folder structure is different, you may need to change the corresponding paths in config files. + +```none +mmsegmentation +├── mmseg +├── tools +├── configs +├── data +│ ├── cityscapes +│ │ ├── leftImg8bit +│ │ │ ├── train +│ │ │ ├── val +│ │ ├── gtFine +│ │ │ ├── train +│ │ │ ├── val +│ ├── VOCdevkit +│ │ ├── VOC2012 +│ │ │ ├── JPEGImages +│ │ │ ├── SegmentationClass +│ │ │ ├── ImageSets +│ │ │ │ ├── Segmentation +│ │ ├── VOC2010 +│ │ │ ├── JPEGImages +│ │ │ ├── SegmentationClassContext +│ │ │ ├── ImageSets +│ │ │ │ ├── SegmentationContext +│ │ │ │ │ ├── train.txt +│ │ │ │ │ ├── val.txt +│ │ │ ├── trainval_merged.json +│ │ ├── VOCaug +│ │ │ ├── dataset +│ │ │ │ ├── cls +│ ├── ade +│ │ ├── ADEChallengeData2016 +│ │ │ ├── annotations +│ │ │ │ ├── training +│ │ │ │ ├── validation +│ │ │ ├── images +│ │ │ │ ├── training +│ │ │ │ ├── validation +│ ├── coco_stuff10k +│ │ ├── images +│ │ │ ├── train2014 +│ │ │ ├── test2014 +│ │ ├── annotations +│ │ │ ├── train2014 +│ │ │ ├── test2014 +│ │ ├── imagesLists +│ │ │ ├── train.txt +│ │ │ ├── test.txt +│ │ │ ├── all.txt +│ ├── coco_stuff164k +│ │ ├── images +│ │ │ ├── train2017 +│ │ │ ├── val2017 +│ │ ├── annotations +│ │ │ ├── train2017 +│ │ │ ├── val2017 +│ ├── CHASE_DB1 +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── DRIVE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── HRF +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── STARE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +| ├── dark_zurich +| │   ├── gps +| │   │   ├── val +| │   │   └── val_ref +| │   ├── gt +| │   │   └── val +| │   ├── LICENSE.txt +| │   ├── lists_file_names +| │   │   ├── val_filenames.txt +| │   │   └── val_ref_filenames.txt +| │   ├── README.md +| │   └── rgb_anon +| │   | ├── val +| │   | └── val_ref +| ├── NighttimeDrivingTest +| | ├── gtCoarse_daytime_trainvaltest +| | │   └── test +| | │   └── night +| | └── leftImg8bit +| | | └── test +| | | └── night +│ ├── loveDA +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ │ ├── test +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── potsdam +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── vaihingen +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── iSAID +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ │ ├── test +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +``` + +### Cityscapes + +The data could be found [here](https://www.cityscapes-dataset.com/downloads/) after registration. + +By convention, `**labelTrainIds.png` are used for cityscapes training. +We provided a [scripts](https://github.com/open-mmlab/mmsegmentation/blob/master/tools/convert_datasets/cityscapes.py) based on [cityscapesscripts](https://github.com/mcordts/cityscapesScripts) +to generate `**labelTrainIds.png`. + +```shell +# --nproc means 8 process for conversion, which could be omitted as well. +python tools/convert_datasets/cityscapes.py data/cityscapes --nproc 8 +``` + +### Pascal VOC + +Pascal VOC 2012 could be downloaded from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar). +Beside, most recent works on Pascal VOC dataset usually exploit extra augmentation data, which could be found [here](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz). + +If you would like to use augmented VOC dataset, please run following command to convert augmentation annotations into proper format. + +```shell +# --nproc means 8 process for conversion, which could be omitted as well. +python tools/convert_datasets/voc_aug.py data/VOCdevkit data/VOCdevkit/VOCaug --nproc 8 +``` + +Please refer to [concat dataset](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/tutorials/customize_datasets.md#concatenate-dataset) for details about how to concatenate them and train them together. + +### ADE20K + +The training and validation set of ADE20K could be download from this [link](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip). +We may also download test set from [here](http://data.csail.mit.edu/places/ADEchallenge/release_test.zip). + +### Pascal Context + +The training and validation set of Pascal Context could be download from [here](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar). You may also download test set from [here](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar) after registration. + +To split the training and validation set from original dataset, you may download trainval_merged.json from [here](https://codalabuser.blob.core.windows.net/public/trainval_merged.json). + +If you would like to use Pascal Context dataset, please install [Detail](https://github.com/zhanghang1989/detail-api) and then run the following command to convert annotations into proper format. + +```shell +python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json +``` + +### COCO Stuff 10k + +The data could be downloaded [here](http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/cocostuff-10k-v1.1.zip) by wget. + +For COCO Stuff 10k dataset, please run the following commands to download and convert the dataset. + +```shell +# download +mkdir coco_stuff10k && cd coco_stuff10k +wget http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/cocostuff-10k-v1.1.zip + +# unzip +unzip cocostuff-10k-v1.1.zip + +# --nproc means 8 process for conversion, which could be omitted as well. +python tools/convert_datasets/coco_stuff10k.py /path/to/coco_stuff10k --nproc 8 +``` + +By convention, mask labels in `/path/to/coco_stuff164k/annotations/*2014/*_labelTrainIds.png` are used for COCO Stuff 10k training and testing. + +### COCO Stuff 164k + +For COCO Stuff 164k dataset, please run the following commands to download and convert the augmented dataset. + +```shell +# download +mkdir coco_stuff164k && cd coco_stuff164k +wget http://images.cocodataset.org/zips/train2017.zip +wget http://images.cocodataset.org/zips/val2017.zip +wget http://calvin.inf.ed.ac.uk/wp-content/uploads/data/cocostuffdataset/stuffthingmaps_trainval2017.zip + +# unzip +unzip train2017.zip -d images/ +unzip val2017.zip -d images/ +unzip stuffthingmaps_trainval2017.zip -d annotations/ + +# --nproc means 8 process for conversion, which could be omitted as well. +python tools/convert_datasets/coco_stuff164k.py /path/to/coco_stuff164k --nproc 8 +``` + +By convention, mask labels in `/path/to/coco_stuff164k/annotations/*2017/*_labelTrainIds.png` are used for COCO Stuff 164k training and testing. + +The details of this dataset could be found at [here](https://github.com/nightrome/cocostuff#downloads). + +### CHASE DB1 + +The training and validation set of CHASE DB1 could be download from [here](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip). + +To convert CHASE DB1 dataset to MMSegmentation format, you should run the following command: + +```shell +python tools/convert_datasets/chase_db1.py /path/to/CHASEDB1.zip +``` + +The script will make directory structure automatically. + +### DRIVE + +The training and validation set of DRIVE could be download from [here](https://drive.grand-challenge.org/). Before that, you should register an account. Currently '1st_manual' is not provided officially. + +To convert DRIVE dataset to MMSegmentation format, you should run the following command: + +```shell +python tools/convert_datasets/drive.py /path/to/training.zip /path/to/test.zip +``` + +The script will make directory structure automatically. + +### HRF + +First, download [healthy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip), [glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip), [diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip), [healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip), [glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) and [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip). + +To convert HRF dataset to MMSegmentation format, you should run the following command: + +```shell +python tools/convert_datasets/hrf.py /path/to/healthy.zip /path/to/healthy_manualsegm.zip /path/to/glaucoma.zip /path/to/glaucoma_manualsegm.zip /path/to/diabetic_retinopathy.zip /path/to/diabetic_retinopathy_manualsegm.zip +``` + +The script will make directory structure automatically. + +### STARE + +First, download [stare-images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar), [labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) and [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar). + +To convert STARE dataset to MMSegmentation format, you should run the following command: + +```shell +python tools/convert_datasets/stare.py /path/to/stare-images.tar /path/to/labels-ah.tar /path/to/labels-vk.tar +``` + +The script will make directory structure automatically. + +### Dark Zurich + +Since we only support test models on this dataset, you may only download [the validation set](https://data.vision.ee.ethz.ch/csakarid/shared/GCMA_UIoU/Dark_Zurich_val_anon.zip). + +### Nighttime Driving + +Since we only support test models on this dataset, you may only download [the test set](http://data.vision.ee.ethz.ch/daid/NighttimeDriving/NighttimeDrivingTest.zip). + +### LoveDA + +The data could be downloaded from Google Drive [here](https://drive.google.com/drive/folders/1ibYV0qwn4yuuh068Rnc-w4tPi0U0c-ti?usp=sharing). + +Or it can be downloaded from [zenodo](https://zenodo.org/record/5706578#.YZvN7SYRXdF), you should run the following command: + +```shell +# Download Train.zip +wget https://zenodo.org/record/5706578/files/Train.zip +# Download Val.zip +wget https://zenodo.org/record/5706578/files/Val.zip +# Download Test.zip +wget https://zenodo.org/record/5706578/files/Test.zip +``` + +For LoveDA dataset, please run the following command to download and re-organize the dataset. + +```shell +python tools/convert_datasets/loveda.py /path/to/loveDA +``` + +Using trained model to predict test set of LoveDA and submit it to server can be found [here](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/inference.md). + +More details about LoveDA can be found [here](https://github.com/Junjue-Wang/LoveDA). + +### ISPRS Potsdam + +The [Potsdam](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-potsdam/) +dataset is for urban semantic segmentation used in the 2D Semantic Labeling Contest - Potsdam. + +The dataset can be requested at the challenge [homepage](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/). +The '2_Ortho_RGB.zip' and '5_Labels_all_noBoundary.zip' are required. + +For Potsdam dataset, please run the following command to download and re-organize the dataset. + +```shell +python tools/convert_datasets/potsdam.py /path/to/potsdam +``` + +In our default setting, it will generate 3456 images for training and 2016 images for validation. + +### ISPRS Vaihingen + +The [Vaihingen](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-vaihingen/) +dataset is for urban semantic segmentation used in the 2D Semantic Labeling Contest - Vaihingen. + +The dataset can be requested at the challenge [homepage](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/). +The 'ISPRS_semantic_labeling_Vaihingen.zip' and 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE.zip' are required. + +For Vaihingen dataset, please run the following command to download and re-organize the dataset. + +```shell +python tools/convert_datasets/vaihingen.py /path/to/vaihingen +``` + +In our default setting (`clip_size` =512, `stride_size`=256), it will generate 344 images for training and 398 images for validation. + +### iSAID + +The data images could be download from [DOTA-v1.0](https://captain-whu.github.io/DOTA/dataset.html) (train/val/test) + +The data annotations could be download from [iSAID](https://captain-whu.github.io/iSAID/dataset.html) (train/val) + +The dataset is a Large-scale Dataset for Instance Segmentation (also have segmantic segmentation) in Aerial Images. + +You may need to follow the following structure for dataset preparation after downloading iSAID dataset. + +``` +│ ├── iSAID +│ │ ├── train +│ │ │ ├── images +│ │ │ │ ├── part1.zip +│ │ │ │ ├── part2.zip +│ │ │ │ ├── part3.zip +│ │ │ ├── Semantic_masks +│ │ │ │ ├── images.zip +│ │ ├── val +│ │ │ ├── images +│ │ │ │ ├── part1.zip +│ │ │ ├── Semantic_masks +│ │ │ │ ├── images.zip +│ │ ├── test +│ │ │ ├── images +│ │ │ │ ├── part1.zip +│ │ │ │ ├── part2.zip +``` + +```shell +python tools/convert_datasets/isaid.py /path/to/iSAID +``` + +In our default setting (`patch_width`=896, `patch_height`=896, `overlap_area`=384), it will generate 33978 images for training and 11644 images for validation. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/faq.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/faq.md new file mode 100644 index 0000000..ca40ae2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/faq.md @@ -0,0 +1,64 @@ +# Frequently Asked Questions (FAQ) + +We list some common troubles faced by many users and their corresponding solutions here. Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them. If the contents here do not cover your issue, please create an issue using the [provided templates](https://github.com/open-mmlab/mmsegmentation/blob/master/.github/ISSUE_TEMPLATE/error-report.md/) and make sure you fill in all required information in the template. + +## Installation + +The compatible MMSegmentation and MMCV versions are as below. Please install the correct version of MMCV to avoid installation issues. + +| MMSegmentation version | MMCV version | MMClassification version | +| :--------------------: | :-------------------------: | :----------------------: | +| master | mmcv-full>=1.5.0, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | +| 0.25.0 | mmcv-full>=1.5.0, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | +| 0.24.1 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | +| 0.23.0 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | +| 0.22.0 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | +| 0.21.1 | mmcv-full>=1.4.4, \<=1.6.0 | Not required | +| 0.20.2 | mmcv-full>=1.3.13, \<=1.6.0 | Not required | +| 0.19.0 | mmcv-full>=1.3.13, \<1.3.17 | Not required | +| 0.18.0 | mmcv-full>=1.3.13, \<1.3.17 | Not required | +| 0.17.0 | mmcv-full>=1.3.7, \<1.3.17 | Not required | +| 0.16.0 | mmcv-full>=1.3.7, \<1.3.17 | Not required | +| 0.15.0 | mmcv-full>=1.3.7, \<1.3.17 | Not required | +| 0.14.1 | mmcv-full>=1.3.7, \<1.3.17 | Not required | +| 0.14.0 | mmcv-full>=1.3.1, \<1.3.2 | Not required | +| 0.13.0 | mmcv-full>=1.3.1, \<1.3.2 | Not required | +| 0.12.0 | mmcv-full>=1.1.4, \<1.3.2 | Not required | +| 0.11.0 | mmcv-full>=1.1.4, \<1.3.0 | Not required | +| 0.10.0 | mmcv-full>=1.1.4, \<1.3.0 | Not required | +| 0.9.0 | mmcv-full>=1.1.4, \<1.3.0 | Not required | +| 0.8.0 | mmcv-full>=1.1.4, \<1.2.0 | Not required | +| 0.7.0 | mmcv-full>=1.1.2, \<1.2.0 | Not required | +| 0.6.0 | mmcv-full>=1.1.2, \<1.2.0 | Not required | + +You need to run `pip uninstall mmcv` first if you have mmcv installed. +If mmcv and mmcv-full are both installed, there will be `ModuleNotFoundError`. + +- "No module named 'mmcv.ops'"; "No module named 'mmcv.\_ext'". + + 1. Uninstall existing mmcv in the environment using `pip uninstall mmcv`. + 2. Install mmcv-full following the [installation instruction](get_started#best-practices). + +## How to know the number of GPUs needed to train the model + +- Infer from the name of the config file of the model. You can refer to the `Config Name Style` part of [Learn about Configs](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/en/tutorials/config.md). For example, for config file with name `segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py`, `8x1` means training the model corresponding to it needs 8 GPUs, and the batch size of each GPU is 1. +- Infer from the log file. Open the log file of the model and search `nGPU` in the file. The number of figures following `nGPU` is the number of GPUs needed to train the model. For instance, searching for `nGPU` in the log file yields the record `nGPU 0,1,2,3,4,5,6,7`, which indicates that eight GPUs are needed to train the model. + +## What does the auxiliary head mean + +Briefly, it is a deep supervision trick to improve the accuracy. In the training phase, `decode_head` is for decoding semantic segmentation output, `auxiliary_head` is just adding an auxiliary loss, the segmentation result produced by it has no impact to your model's result, it just works in training. You may read this [paper](https://arxiv.org/pdf/1612.01105.pdf) for more information. + +## Why is the log file not created + +In the train script, we call `get_root_logger`at Line 167, and `get_root_logger` in mmseg calls `get_logger` in mmcv, mmcv will return the same logger which has beed initialized in 'mmsegmentation/tools/train.py' with the parameter `log_file`. There is only one logger (initialized with `log_file`) during training. +Ref: [https://github.com/open-mmlab/mmcv/blob/21bada32560c7ed7b15b017dc763d862789e29a8/mmcv/utils/logging.py#L9-L16](https://github.com/open-mmlab/mmcv/blob/21bada32560c7ed7b15b017dc763d862789e29a8/mmcv/utils/logging.py#L9-L16) + +If you find the log file not been created, you might check if `mmcv.utils.get_logger` is called elsewhere. + +## How to output the image for painting the segmentation mask when running the test script + +In the test script, we provide `show-dir` argument to control whether output the painted images. Users might run the following command: + +```shell +python tools/test.py {config} {checkpoint} --show-dir {/path/to/save/image} --opacity 1 +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/get_started.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/get_started.md new file mode 100644 index 0000000..bbe3d57 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/get_started.md @@ -0,0 +1,200 @@ +# Prerequisites + +In this section we demonstrate how to prepare an environment with PyTorch. + +MMSegmentation works on Linux, Windows and macOS. It requires Python 3.6+, CUDA 9.2+ and PyTorch 1.3+. + +```{note} +If you are experienced with PyTorch and have already installed it, just skip this part and jump to the [next section](#installation). Otherwise, you can follow these steps for the preparation. +``` + +**Step 0.** Download and install Miniconda from the [official website](https://docs.conda.io/en/latest/miniconda.html). + +**Step 1.** Create a conda environment and activate it. + +```shell +conda create --name openmmlab python=3.8 -y +conda activate openmmlab +``` + +**Step 2.** Install PyTorch following [official instructions](https://pytorch.org/get-started/locally/), e.g. + +On GPU platforms: + +```shell +conda install pytorch torchvision -c pytorch +``` + +On CPU platforms: + +```shell +conda install pytorch torchvision cpuonly -c pytorch +``` + +# Installation + +We recommend that users follow our best practices to install MMSegmentation. However, the whole process is highly customizable. See [Customize Installation](#customize-installation) section for more information. + +## Best Practices + +**Step 0.** Install [MMCV](https://github.com/open-mmlab/mmcv) using [MIM](https://github.com/open-mmlab/mim). + +```shell +pip install -U openmim +mim install mmcv-full +``` + +**Step 1.** Install MMSegmentation. + +Case a: If you develop and run mmseg directly, install it from source: + +```shell +git clone https://github.com/open-mmlab/mmsegmentation.git +cd mmsegmentation +pip install -v -e . +# "-v" means verbose, or more output +# "-e" means installing a project in editable mode, +# thus any local modifications made to the code will take effect without reinstallation. +``` + +Case b: If you use mmsegmentation as a dependency or third-party package, install it with pip: + +```shell +pip install mmsegmentation +``` + +## Verify the installation + +To verify whether MMSegmentation is installed correctly, we provide some sample codes to run an inference demo. + +**Step 1.** We need to download config and checkpoint files. + +```shell +mim download mmsegmentation --config pspnet_r50-d8_512x1024_40k_cityscapes --dest . +``` + +The downloading will take several seconds or more, depending on your network environment. When it is done, you will find two files `pspnet_r50-d8_512x1024_40k_cityscapes.py` and `pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth` in your current folder. + +**Step 2.** Verify the inference demo. + +Option (a). If you install mmsegmentation from source, just run the following command. + +```shell +python demo/image_demo.py demo/demo.png configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth --device cuda:0 --out-file result.jpg +``` + +You will see a new image `result.jpg` on your current folder, where segmentation masks are covered on all objects. + +Option (b). If you install mmsegmentation with pip, open you python interpreter and copy&paste the following codes. + +```python +from mmseg.apis import inference_segmentor, init_segmentor +import mmcv + +config_file = 'pspnet_r50-d8_512x1024_40k_cityscapes.py' +checkpoint_file = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth' + +# build the model from a config file and a checkpoint file +model = init_segmentor(config_file, checkpoint_file, device='cuda:0') + +# test a single image and show the results +img = 'test.jpg' # or img = mmcv.imread(img), which will only load it once +result = inference_segmentor(model, img) +# visualize the results in a new window +model.show_result(img, result, show=True) +# or save the visualization results to image files +# you can change the opacity of the painted segmentation map in (0, 1]. +model.show_result(img, result, out_file='result.jpg', opacity=0.5) + +# test a video and show the results +video = mmcv.VideoReader('video.mp4') +for frame in video: + result = inference_segmentor(model, frame) + model.show_result(frame, result, wait_time=1) +``` + +You can modify the code above to test a single image or a video, both of these options can verify that the installation was successful. + +## Customize Installation + +### CUDA versions + +When installing PyTorch, you need to specify the version of CUDA. If you are not clear on which to choose, follow our recommendations: + +- For Ampere-based NVIDIA GPUs, such as GeForce 30 series and NVIDIA A100, CUDA 11 is a must. +- For older NVIDIA GPUs, CUDA 11 is backward compatible, but CUDA 10.2 offers better compatibility and is more lightweight. + +Please make sure the GPU driver satisfies the minimum version requirements. See [this table](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions__table-cuda-toolkit-driver-versions) for more information. + +```{note} +Installing CUDA runtime libraries is enough if you follow our best practices, because no CUDA code will be compiled locally. However if you hope to compile MMCV from source or develop other CUDA operators, you need to install the complete CUDA toolkit from NVIDIA's [website](https://developer.nvidia.com/cuda-downloads), and its version should match the CUDA version of PyTorch. i.e., the specified version of cudatoolkit in `conda install` command. +``` + +### Install MMCV without MIM + +MMCV contains C++ and CUDA extensions, thus depending on PyTorch in a complex way. MIM solves such dependencies automatically and makes the installation easier. However, it is not a must. + +To install MMCV with pip instead of MIM, please follow [MMCV installation guides](https://mmcv.readthedocs.io/en/latest/get_started/installation.html). This requires manually specifying a find-url based on PyTorch version and its CUDA version. + +For example, the following command install mmcv-full built for PyTorch 1.10.x and CUDA 11.3. + +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10/index.html +``` + +### Install on CPU-only platforms + +MMSegmentation can be built for CPU only environment. In CPU mode you can train (requires MMCV version >= 1.4.4), test or inference a model. + +### Install on Google Colab + +[Google Colab](https://research.google.com/) usually has PyTorch installed, +thus we only need to install MMCV and MMSegmentation with the following commands. + +**Step 1.** Install [MMCV](https://github.com/open-mmlab/mmcv) using [MIM](https://github.com/open-mmlab/mim). + +```shell +!pip3 install openmim +!mim install mmcv-full +``` + +**Step 2.** Install MMSegmentation from the source. + +```shell +!git clone https://github.com/open-mmlab/mmsegmentation.git +%cd mmsegmentation +!pip install -e . +``` + +**Step 3.** Verification. + +```python +import mmseg +print(mmseg.__version__) +# Example output: 0.24.1 +``` + +```{note} +Within Jupyter, the exclamation mark `!` is used to call external executables and `%cd` is a [magic command](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-cd) to change the current working directory of Python. +``` + +### Using MMSegmentation with Docker + +We provide a [Dockerfile](https://github.com/open-mmlab/mmsegmentation/blob/master/docker/Dockerfile) to build an image. Ensure that your [docker version](https://docs.docker.com/engine/install/) >=19.03. + +```shell +# build an image with PyTorch 1.11, CUDA 11.3 +# If you prefer other versions, just modified the Dockerfile +docker build -t mmsegmentation docker/ +``` + +Run it with + +```shell +docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmsegmentation/data mmsegmentation +``` + +## Trouble shooting + +If you have some issues during the installation, please first view the [FAQ](faq.md) page. +You may [open an issue](https://github.com/open-mmlab/mmsegmentation/issues/new/choose) on GitHub if no solution is found. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/index.rst b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/index.rst new file mode 100644 index 0000000..ae009bf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/index.rst @@ -0,0 +1,63 @@ +Welcome to MMSegmentation's documentation! +======================================= + +.. toctree:: + :maxdepth: 2 + :caption: Get Started + + get_started.md + +.. toctree:: + :maxdepth: 1 + :caption: Dataset Preparation + + dataset_prepare.md + +.. toctree:: + :maxdepth: 1 + :caption: Model Zoo + + model_zoo.md + modelzoo_statistics.md + +.. toctree:: + :maxdepth: 2 + :caption: Quick Run + + train.md + inference.md + +.. toctree:: + :maxdepth: 2 + :caption: Tutorials + + tutorials/index.rst + +.. toctree:: + :maxdepth: 2 + :caption: Useful Tools and Scripts + + useful_tools.md + +.. toctree:: + :maxdepth: 2 + :caption: Notes + + changelog.md + faq.md + +.. toctree:: + :caption: Switch Language + + switch_language.md + +.. toctree:: + :caption: API Reference + + api.rst + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`search` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/inference.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/inference.md new file mode 100644 index 0000000..6175e6e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/inference.md @@ -0,0 +1,131 @@ +## Inference with pretrained models + +We provide testing scripts to evaluate a whole dataset (Cityscapes, PASCAL VOC, ADE20k, etc.), +and also some high-level apis for easier integration to other projects. + +### Test a dataset + +- single GPU +- CPU +- single node multiple GPU +- multiple node + +You can use the following commands to test a dataset. + +```shell +# single-gpu testing +python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] [--show] + +# CPU: If GPU unavailable, directly running single-gpu testing command above +# CPU: If GPU available, disable GPUs and run single-gpu testing script +export CUDA_VISIBLE_DEVICES=-1 +python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] [--show] + +# multi-gpu testing +./tools/dist_test.sh ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] +``` + +Optional arguments: + +- `RESULT_FILE`: Filename of the output results in pickle format. If not specified, the results will not be saved to a file. (After mmseg v0.17, the output results become pre-evaluation results or format result paths) +- `EVAL_METRICS`: Items to be evaluated on the results. Allowed values depend on the dataset, e.g., `mIoU` is available for all dataset. Cityscapes could be evaluated by `cityscapes` as well as standard `mIoU` metrics. +- `--show`: If specified, segmentation results will be plotted on the images and shown in a new window. It is only applicable to single GPU testing and used for debugging and visualization. Please make sure that GUI is available in your environment, otherwise you may encounter the error like `cannot connect to X server`. +- `--show-dir`: If specified, segmentation results will be plotted on the images and saved to the specified directory. It is only applicable to single GPU testing and used for debugging and visualization. You do NOT need a GUI available in your environment for using this option. +- `--eval-options`: Optional parameters for `dataset.format_results` and `dataset.evaluate` during evaluation. When `efficient_test=True`, it will save intermediate results to local files to save CPU memory. Make sure that you have enough local storage space (more than 20GB). (`efficient_test` argument does not have effect after mmseg v0.17, we use a progressive mode to evaluation and format results which can largely save memory cost and evaluation time.) + +Examples: + +Assume that you have already downloaded the checkpoints to the directory `checkpoints/`. + +1. Test PSPNet and visualize the results. Press any key for the next image. + + ```shell + python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ + checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ + --show + ``` + +2. Test PSPNet and save the painted images for latter visualization. + + ```shell + python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ + checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ + --show-dir psp_r50_512x1024_40ki_cityscapes_results + ``` + +3. Test PSPNet on PASCAL VOC (without saving the test results) and evaluate the mIoU. + + ```shell + python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_20k_voc12aug.py \ + checkpoints/pspnet_r50-d8_512x1024_20k_voc12aug_20200605_003338-c57ef100.pth \ + --eval mAP + ``` + +4. Test PSPNet with 4 GPUs, and evaluate the standard mIoU and cityscapes metric. + + ```shell + ./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ + checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ + 4 --out results.pkl --eval mIoU cityscapes + ``` + +:::{note} +There is some gap (~0.1%) between cityscapes mIoU and our mIoU. The reason is that cityscapes average each class with class size by default. +We use the simple version without average for all datasets. +::: + +5. Test PSPNet on cityscapes test split with 4 GPUs, and generate the png files to be submit to the official evaluation server. + + First, add following to config file `configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py`, + + ```python + data = dict( + test=dict( + img_dir='leftImg8bit/test', + ann_dir='gtFine/test')) + ``` + + Then run test. + + ```shell + ./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ + checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ + 4 --format-only --eval-options "imgfile_prefix=./pspnet_test_results" + ``` + + You will get png files under `./pspnet_test_results` directory. + You may run `zip -r results.zip pspnet_test_results/` and submit the zip file to [evaluation server](https://www.cityscapes-dataset.com/submit/). + +6. CPU memory efficient test DeeplabV3+ on Cityscapes (without saving the test results) and evaluate the mIoU. + + ```shell + python tools/test.py \ + configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py \ + deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth \ + --eval-options efficient_test=True \ + --eval mIoU + ``` + + Using `pmap` to view CPU memory footprint, it used 2.25GB CPU memory with `efficient_test=True` and 11.06GB CPU memory with `efficient_test=False` . This optional parameter can save a lot of memory. (After mmseg v0.17, efficient_test has not effect and we use a progressive mode to evaluation and format results efficiently by default.) + +7. Test PSPNet on LoveDA test split with 1 GPU, and generate the png files to be submit to the official evaluation server. + + First, add following to config file `configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py`, + + ```python + data = dict( + test=dict( + img_dir='img_dir/test', + ann_dir='ann_dir/test')) + ``` + + Then run test. + + ```shell + python ./tools/test.py configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py \ + checkpoints/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth \ + --format-only --eval-options "imgfile_prefix=./pspnet_test_results" + ``` + + You will get png files under `./pspnet_test_results` directory. + You may run `zip -r -j Results.zip pspnet_test_results/` and submit the zip file to [evaluation server](https://codalab.lisn.upsaclay.fr/competitions/421). diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/make.bat b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/make.bat new file mode 100644 index 0000000..922152e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/model_zoo.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/model_zoo.md new file mode 100644 index 0000000..782a470 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/model_zoo.md @@ -0,0 +1,186 @@ +# Benchmark and Model Zoo + +## Common settings + +- We use distributed training with 4 GPUs by default. + +- All pytorch-style pretrained backbones on ImageNet are train by ourselves, with the same procedure in the [paper](https://arxiv.org/pdf/1812.01187.pdf). + Our ResNet style backbone are based on ResNetV1c variant, where the 7x7 conv in the input stem is replaced with three 3x3 convs. + +- For the consistency across different hardwares, we report the GPU memory as the maximum value of `torch.cuda.max_memory_allocated()` for all 4 GPUs with `torch.backends.cudnn.benchmark=False`. + Note that this value is usually less than what `nvidia-smi` shows. + +- We report the inference time as the total time of network forwarding and post-processing, excluding the data loading time. + Results are obtained with the script `tools/benchmark.py` which computes the average time on 200 images with `torch.backends.cudnn.benchmark=False`. + +- There are two inference modes in this framework. + + - `slide` mode: The `test_cfg` will be like `dict(mode='slide', crop_size=(769, 769), stride=(513, 513))`. + + In this mode, multiple patches will be cropped from input image, passed into network individually. + The crop size and stride between patches are specified by `crop_size` and `stride`. + The overlapping area will be merged by average + + - `whole` mode: The `test_cfg` will be like `dict(mode='whole')`. + + In this mode, the whole imaged will be passed into network directly. + + By default, we use `slide` inference for 769x769 trained model, `whole` inference for the rest. + +- For input size of 8x+1 (e.g. 769), `align_corner=True` is adopted as a traditional practice. + Otherwise, for input size of 8x (e.g. 512, 1024), `align_corner=False` is adopted. + +## Baselines + +### FCN + +Please refer to [FCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn) for details. + +### PSPNet + +Please refer to [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) for details. + +### DeepLabV3 + +Please refer to [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3) for details. + +### PSANet + +Please refer to [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet) for details. + +### DeepLabV3+ + +Please refer to [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus) for details. + +### UPerNet + +Please refer to [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet) for details. + +### NonLocal Net + +Please refer to [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nonlocal_net) for details. + +### EncNet + +Please refer to [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) for details. + +### CCNet + +Please refer to [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet) for details. + +### DANet + +Please refer to [DANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet) for details. + +### APCNet + +Please refer to [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet) for details. + +### HRNet + +Please refer to [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet) for details. + +### GCNet + +Please refer to [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet) for details. + +### DMNet + +Please refer to [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet) for details. + +### ANN + +Please refer to [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann) for details. + +### OCRNet + +Please refer to [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) for details. + +### Fast-SCNN + +Please refer to [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) for details. + +### ResNeSt + +Please refer to [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) for details. + +### Semantic FPN + +Please refer to [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/sem_fpn) for details. + +### PointRend + +Please refer to [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend) for details. + +### MobileNetV2 + +Please refer to [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2) for details. + +### MobileNetV3 + +Please refer to [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3) for details. + +### EMANet + +Please refer to [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet) for details. + +### DNLNet + +Please refer to [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet) for details. + +### CGNet + +Please refer to [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet) for details. + +### Mixed Precision (FP16) Training + +Please refer [Mixed Precision (FP16) Training on BiSeNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py) for details. + +### U-Net + +Please refer to [U-Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/unet/README.md) for details. + +### ViT + +Please refer to [ViT](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/vit/README.md) for details. + +### Swin + +Please refer to [Swin](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/swin/README.md) for details. + +### SETR + +Please refer to [SETR](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/setr/README.md) for details. + +## Speed benchmark + +### Hardware + +- 8 NVIDIA Tesla V100 (32G) GPUs +- Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz + +### Software environment + +- Python 3.7 +- PyTorch 1.5 +- CUDA 10.1 +- CUDNN 7.6.03 +- NCCL 2.4.08 + +### Training speed + +For fair comparison, we benchmark all implementations with ResNet-101V1c. +The input size is fixed to 1024x512 with batch size 2. + +The training speed is reported as followed, in terms of second per iter (s/iter). The lower, the better. + +| Implementation | PSPNet (s/iter) | DeepLabV3+ (s/iter) | +| --------------------------------------------------------------------------- | --------------- | ------------------- | +| [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) | **0.83** | **0.85** | +| [SegmenTron](https://github.com/LikeLy-Journey/SegmenTron) | 0.84 | 0.85 | +| [CASILVision](https://github.com/CSAILVision/semantic-segmentation-pytorch) | 1.15 | N/A | +| [vedaseg](https://github.com/Media-Smart/vedaseg) | 0.95 | 1.25 | + +:::{note} +The output stride of DeepLabV3+ is 8. +::: diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/stat.py b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/stat.py new file mode 100644 index 0000000..1398a70 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/stat.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# Copyright (c) OpenMMLab. All rights reserved. +import functools as func +import glob +import os.path as osp +import re + +import numpy as np + +url_prefix = 'https://github.com/open-mmlab/mmsegmentation/blob/master/' + +files = sorted(glob.glob('../../configs/*/README.md')) + +stats = [] +titles = [] +num_ckpts = 0 + +for f in files: + url = osp.dirname(f.replace('../../', url_prefix)) + + with open(f, 'r') as content_file: + content = content_file.read() + + title = content.split('\n')[0].replace('#', '').strip() + ckpts = set(x.lower().strip() + for x in re.findall(r'https?://download.*\.pth', content) + if 'mmsegmentation' in x) + if len(ckpts) == 0: + continue + + _papertype = [ + x for x in re.findall(r'', content) + ] + assert len(_papertype) > 0 + papertype = _papertype[0] + + paper = set([(papertype, title)]) + + titles.append(title) + num_ckpts += len(ckpts) + statsmsg = f""" +\t* [{papertype}] [{title}]({url}) ({len(ckpts)} ckpts) +""" + stats.append((paper, ckpts, statsmsg)) + +allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats]) +msglist = '\n'.join(x for _, _, x in stats) + +papertypes, papercounts = np.unique([t for t, _ in allpapers], + return_counts=True) +countstr = '\n'.join( + [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)]) + +modelzoo = f""" +# Model Zoo Statistics + +* Number of papers: {len(set(titles))} +{countstr} + +* Number of checkpoints: {num_ckpts} +{msglist} +""" + +with open('modelzoo_statistics.md', 'w') as f: + f.write(modelzoo) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/switch_language.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/switch_language.md new file mode 100644 index 0000000..f58efc4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/switch_language.md @@ -0,0 +1,3 @@ +## English + +## 简体中文 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/train.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/train.md new file mode 100644 index 0000000..7c1c411 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/train.md @@ -0,0 +1,169 @@ +## Train a model + +MMSegmentation implements distributed training and non-distributed training, +which uses `MMDistributedDataParallel` and `MMDataParallel` respectively. + +All outputs (log files and checkpoints) will be saved to the working directory, +which is specified by `work_dir` in the config file. + +By default we evaluate the model on the validation set after some iterations, you can change the evaluation interval by adding the interval argument in the training config. + +```python +evaluation = dict(interval=4000) # This evaluate the model per 4000 iterations. +``` + +**\*Important\***: The default learning rate in config files is for 4 GPUs and 2 img/gpu (batch size = 4x2 = 8). +Equivalently, you may also use 8 GPUs and 1 imgs/gpu since all models using cross-GPU SyncBN. + +To trade speed with GPU memory, you may pass in `--cfg-options model.backbone.with_cp=True` to enable checkpoint in backbone. + +### Train on a single machine + +#### Train with a single GPU + +official support: + +```shell +sh tools/dist_train.sh ${CONFIG_FILE} 1 [optional arguments] +``` + +experimental support (Convert SyncBN to BN): + +```shell +python tools/train.py ${CONFIG_FILE} [optional arguments] +``` + +If you want to specify the working directory in the command, you can add an argument `--work-dir ${YOUR_WORK_DIR}`. + +#### Train with CPU + +The process of training on the CPU is consistent with single GPU training if machine does not have GPU. If it has GPUs but not wanting to use it, we just need to disable GPUs before the training process. + +```shell +export CUDA_VISIBLE_DEVICES=-1 +``` + +And then run the script [above](#train-with-a-single-gpu). + +```{warning} +The process of training on the CPU is consistent with single GPU training. We just need to disable GPUs before the training process. +``` + +#### Train with multiple GPUs + +```shell +sh tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments] +``` + +Optional arguments are: + +- `--no-validate` (**not suggested**): By default, the codebase will perform evaluation at every k iterations during the training. To disable this behavior, use `--no-validate`. +- `--work-dir ${WORK_DIR}`: Override the working directory specified in the config file. +- `--resume-from ${CHECKPOINT_FILE}`: Resume from a previous checkpoint file (to continue the training process). +- `--load-from ${CHECKPOINT_FILE}`: Load weights from a checkpoint file (to start finetuning for another task). +- `--deterministic`: Switch on "deterministic" mode which slows down training but the results are reproducible. + +Difference between `resume-from` and `load-from`: + +- `resume-from` loads both the model weights and optimizer state including the iteration number. +- `load-from` loads only the model weights, starts the training from iteration 0. + +An example: + +```shell +# checkpoints and logs saved in WORK_DIR=work_dirs/pspnet_r50-d8_512x512_80k_ade20k/ +# If work_dir is not set, it will be generated automatically. +sh tools/dist_train.sh configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py 8 --work_dir work_dirs/pspnet_r50-d8_512x512_80k_ade20k/ --deterministic +``` + +**Note**: During training, checkpoints and logs are saved in the same folder structure as the config file under `work_dirs/`. Custom work directory is not recommended since evaluation scripts infer work directories from the config file name. If you want to save your weights somewhere else, please use symlink, for example: + +```shell +ln -s ${YOUR_WORK_DIRS} ${MMSEG}/work_dirs +``` + +#### Launch multiple jobs on a single machine + +If you launch multiple jobs on a single machine, e.g., 2 jobs of 4-GPU training on a machine with 8 GPUs, you need to specify different ports (29500 by default) for each job to avoid communication conflict. Otherwise, there will be error message saying `RuntimeError: Address already in use`. + +If you use `dist_train.sh` to launch training jobs, you can set the port in commands with environment variable `PORT`. + +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 sh tools/dist_train.sh ${CONFIG_FILE} 4 +CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 sh tools/dist_train.sh ${CONFIG_FILE} 4 +``` + +### Train with multiple machines + +If you launch with multiple machines simply connected with ethernet, you can simply run following commands: + +On the first machine: + +```shell +NNODES=2 NODE_RANK=0 PORT=$MASTER_PORT MASTER_ADDR=$MASTER_ADDR sh tools/dist_train.sh $CONFIG $GPUS +``` + +On the second machine: + +```shell +NNODES=2 NODE_RANK=1 PORT=$MASTER_PORT MASTER_ADDR=$MASTER_ADDR sh tools/dist_train.sh $CONFIG $GPUS +``` + +Usually it is slow if you do not have high speed networking like InfiniBand. + +### Manage jobs with Slurm + +Slurm is a good job scheduling system for computing clusters. On a cluster managed by Slurm, you can use slurm_train.sh to spawn training jobs. It supports both single-node and multi-node training. + +Train with multiple machines: + +```shell +[GPUS=${GPUS}] sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE} --work-dir ${WORK_DIR} +``` + +Here is an example of using 16 GPUs to train PSPNet on the dev partition. + +```shell +GPUS=16 sh tools/slurm_train.sh dev pspr50 configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py work_dirs/pspnet_r50-d8_512x1024_40k_cityscapes/ +``` + +When using 'slurm_train.sh' to start multiple tasks on a node, different ports need to be specified. Three settings are provided: + +Option 1: + +In `config1.py`: + +```python +dist_params = dict(backend='nccl', port=29500) +``` + +In `config2.py`: + +```python +dist_params = dict(backend='nccl', port=29501) +``` + +Then you can launch two jobs with config1.py and config2.py. + +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py tmp_work_dir_1 +CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py tmp_work_dir_2 +``` + +Option 2: + +You can set different communication ports without the need to modify the configuration file, but have to set the `cfg-options` to overwrite the default port in configuration file. + +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py tmp_work_dir_1 --cfg-options dist_params.port=29500 +CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py tmp_work_dir_2 --cfg-options dist_params.port=29501 +``` + +Option 3: + +You can set the port in the command using the environment variable 'MASTER_PORT': + +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 MASTER_PORT=29500 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py tmp_work_dir_1 +CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 MASTER_PORT=29501 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py tmp_work_dir_2 +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/config.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/config.md new file mode 100644 index 0000000..2db5469 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/config.md @@ -0,0 +1,381 @@ +# Tutorial 1: Learn about Configs + +We incorporate modular and inheritance design into our config system, which is convenient to conduct various experiments. +If you wish to inspect the config file, you may run `python tools/print_config.py /PATH/TO/CONFIG` to see the complete config. +You may also pass `--cfg-options xxx.yyy=zzz` to see updated config. + +## Config File Structure + +There are 4 basic component types under `config/_base_`, dataset, model, schedule, default_runtime. +Many methods could be easily constructed with one of each like DeepLabV3, PSPNet. +The configs that are composed by components from `_base_` are called _primitive_. + +For all configs under the same folder, it is recommended to have only **one** _primitive_ config. All other configs should inherit from the _primitive_ config. In this way, the maximum of inheritance level is 3. + +For easy understanding, we recommend contributors to inherit from existing methods. +For example, if some modification is made base on DeepLabV3, user may first inherit the basic DeepLabV3 structure by specifying `_base_ = ../deeplabv3/deeplabv3_r50_512x1024_40ki_cityscapes.py`, then modify the necessary fields in the config files. + +If you are building an entirely new method that does not share the structure with any of the existing methods, you may create a folder `xxxnet` under `configs`, + +Please refer to [mmcv](https://mmcv.readthedocs.io/en/latest/understand_mmcv/config.html) for detailed documentation. + +## Config Name Style + +We follow the below style to name config files. Contributors are advised to follow the same style. + +``` +{model}_{backbone}_[misc]_[gpu x batch_per_gpu]_{resolution}_{iterations}_{dataset} +``` + +`{xxx}` is required field and `[yyy]` is optional. + +- `{model}`: model type like `psp`, `deeplabv3`, etc. +- `{backbone}`: backbone type like `r50` (ResNet-50), `x101` (ResNeXt-101). +- `[misc]`: miscellaneous setting/plugins of model, e.g. `dconv`, `gcb`, `attention`, `mstrain`. +- `[gpu x batch_per_gpu]`: GPUs and samples per GPU, `8x2` is used by default. +- `{iterations}`: number of training iterations like `160k`. +- `{dataset}`: dataset like `cityscapes`, `voc12aug`, `ade`. + +## An Example of PSPNet + +To help the users have a basic idea of a complete config and the modules in a modern semantic segmentation system, +we make brief comments on the config of PSPNet using ResNet50V1c as the following. +For more detailed usage and the corresponding alternative for each module, please refer to the API documentation. + +```python +norm_cfg = dict(type='SyncBN', requires_grad=True) # Segmentation usually uses SyncBN +model = dict( + type='EncoderDecoder', # Name of segmentor + pretrained='open-mmlab://resnet50_v1c', # The ImageNet pretrained backbone to be loaded + backbone=dict( + type='ResNetV1c', # The type of backbone. Please refer to mmseg/models/backbones/resnet.py for details. + depth=50, # Depth of backbone. Normally 50, 101 are used. + num_stages=4, # Number of stages of backbone. + out_indices=(0, 1, 2, 3), # The index of output feature maps produced in each stages. + dilations=(1, 1, 2, 4), # The dilation rate of each layer. + strides=(1, 2, 1, 1), # The stride of each layer. + norm_cfg=dict( # The configuration of norm layer. + type='SyncBN', # Type of norm layer. Usually it is SyncBN. + requires_grad=True), # Whether to train the gamma and beta in norm + norm_eval=False, # Whether to freeze the statistics in BN + style='pytorch', # The style of backbone, 'pytorch' means that stride 2 layers are in 3x3 conv, 'caffe' means stride 2 layers are in 1x1 convs. + contract_dilation=True), # When dilation > 1, whether contract first layer of dilation. + decode_head=dict( + type='PSPHead', # Type of decode head. Please refer to mmseg/models/decode_heads for available options. + in_channels=2048, # Input channel of decode head. + in_index=3, # The index of feature map to select. + channels=512, # The intermediate channels of decode head. + pool_scales=(1, 2, 3, 6), # The avg pooling scales of PSPHead. Please refer to paper for details. + dropout_ratio=0.1, # The dropout ratio before final classification layer. + num_classes=19, # Number of segmentation class. Usually 19 for cityscapes, 21 for VOC, 150 for ADE20k. + norm_cfg=dict(type='SyncBN', requires_grad=True), # The configuration of norm layer. + align_corners=False, # The align_corners argument for resize in decoding. + loss_decode=dict( # Config of loss function for the decode_head. + type='CrossEntropyLoss', # Type of loss used for segmentation. + use_sigmoid=False, # Whether use sigmoid activation for segmentation. + loss_weight=1.0)), # Loss weight of decode head. + auxiliary_head=dict( + type='FCNHead', # Type of auxiliary head. Please refer to mmseg/models/decode_heads for available options. + in_channels=1024, # Input channel of auxiliary head. + in_index=2, # The index of feature map to select. + channels=256, # The intermediate channels of decode head. + num_convs=1, # Number of convs in FCNHead. It is usually 1 in auxiliary head. + concat_input=False, # Whether concat output of convs with input before classification layer. + dropout_ratio=0.1, # The dropout ratio before final classification layer. + num_classes=19, # Number of segmentation class. Usually 19 for cityscapes, 21 for VOC, 150 for ADE20k. + norm_cfg=dict(type='SyncBN', requires_grad=True), # The configuration of norm layer. + align_corners=False, # The align_corners argument for resize in decoding. + loss_decode=dict( # Config of loss function for the decode_head. + type='CrossEntropyLoss', # Type of loss used for segmentation. + use_sigmoid=False, # Whether use sigmoid activation for segmentation. + loss_weight=0.4))) # Loss weight of auxiliary head, which is usually 0.4 of decode head. +train_cfg = dict() # train_cfg is just a place holder for now. +test_cfg = dict(mode='whole') # The test mode, options are 'whole' and 'sliding'. 'whole': whole image fully-convolutional test. 'sliding': sliding crop window on the image. +dataset_type = 'CityscapesDataset' # Dataset type, this will be used to define the dataset. +data_root = 'data/cityscapes/' # Root path of data. +img_norm_cfg = dict( # Image normalization config to normalize the input images. + mean=[123.675, 116.28, 103.53], # Mean values used to pre-training the pre-trained backbone models. + std=[58.395, 57.12, 57.375], # Standard variance used to pre-training the pre-trained backbone models. + to_rgb=True) # The channel orders of image used to pre-training the pre-trained backbone models. +crop_size = (512, 1024) # The crop size during training. +train_pipeline = [ # Training pipeline. + dict(type='LoadImageFromFile'), # First pipeline to load images from file path. + dict(type='LoadAnnotations'), # Second pipeline to load annotations for current image. + dict(type='Resize', # Augmentation pipeline that resize the images and their annotations. + img_scale=(2048, 1024), # The largest scale of image. + ratio_range=(0.5, 2.0)), # The augmented scale range as ratio. + dict(type='RandomCrop', # Augmentation pipeline that randomly crop a patch from current image. + crop_size=(512, 1024), # The crop size of patch. + cat_max_ratio=0.75), # The max area ratio that could be occupied by single category. + dict( + type='RandomFlip', # Augmentation pipeline that flip the images and their annotations + flip_ratio=0.5), # The ratio or probability to flip + dict(type='PhotoMetricDistortion'), # Augmentation pipeline that distort current image with several photo metric methods. + dict( + type='Normalize', # Augmentation pipeline that normalize the input images + mean=[123.675, 116.28, 103.53], # These keys are the same of img_norm_cfg since the + std=[58.395, 57.12, 57.375], # keys of img_norm_cfg are used here as arguments + to_rgb=True), + dict(type='Pad', # Augmentation pipeline that pad the image to specified size. + size=(512, 1024), # The output size of padding. + pad_val=0, # The padding value for image. + seg_pad_val=255), # The padding value of 'gt_semantic_seg'. + dict(type='DefaultFormatBundle'), # Default format bundle to gather data in the pipeline + dict(type='Collect', # Pipeline that decides which keys in the data should be passed to the segmentor + keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), # First pipeline to load images from file path + dict( + type='MultiScaleFlipAug', # An encapsulation that encapsulates the test time augmentations + img_scale=(2048, 1024), # Decides the largest scale for testing, used for the Resize pipeline + flip=False, # Whether to flip images during testing + transforms=[ + dict(type='Resize', # Use resize augmentation + keep_ratio=True), # Whether to keep the ratio between height and width, the img_scale set here will be suppressed by the img_scale set above. + dict(type='RandomFlip'), # Thought RandomFlip is added in pipeline, it is not used when flip=False + dict( + type='Normalize', # Normalization config, the values are from img_norm_cfg + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='ImageToTensor', # Convert image to tensor + keys=['img']), + dict(type='Collect', # Collect pipeline that collect necessary keys for testing. + keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, # Batch size of a single GPU + workers_per_gpu=2, # Worker to pre-fetch data for each single GPU + train=dict( # Train dataset config + type='CityscapesDataset', # Type of dataset, refer to mmseg/datasets/ for details. + data_root='data/cityscapes/', # The root of dataset. + img_dir='leftImg8bit/train', # The image directory of dataset. + ann_dir='gtFine/train', # The annotation directory of dataset. + pipeline=[ # pipeline, this is passed by the train_pipeline created before. + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='Pad', size=(512, 1024), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( # Validation dataset config + type='CityscapesDataset', + data_root='data/cityscapes/', + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=[ # Pipeline is passed by test_pipeline created before + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CityscapesDataset', + data_root='data/cityscapes/', + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +log_config = dict( # config to register logger hook + interval=50, # Interval to print the log + hooks=[ + # dict(type='TensorboardLoggerHook') # The Tensorboard logger is also supported + dict(type='TextLoggerHook', by_epoch=False) + ]) +dist_params = dict(backend='nccl') # Parameters to setup distributed training, the port can also be set. +log_level = 'INFO' # The level of logging. +load_from = None # load models as a pre-trained model from a given path. This will not resume training. +resume_from = None # Resume checkpoints from a given path, the training will be resumed from the iteration when the checkpoint's is saved. +workflow = [('train', 1)] # Workflow for runner. [('train', 1)] means there is only one workflow and the workflow named 'train' is executed once. The workflow trains the model by 40000 iterations according to the `runner.max_iters`. +cudnn_benchmark = True # Whether use cudnn_benchmark to speed up, which is fast for fixed input size. +optimizer = dict( # Config used to build optimizer, support all the optimizers in PyTorch whose arguments are also the same as those in PyTorch + type='SGD', # Type of optimizers, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13 for more details + lr=0.01, # Learning rate of optimizers, see detail usages of the parameters in the documentation of PyTorch + momentum=0.9, # Momentum + weight_decay=0.0005) # Weight decay of SGD +optimizer_config = dict() # Config used to build the optimizer hook, refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/optimizer.py#L8 for implementation details. +lr_config = dict( + policy='poly', # The policy of scheduler, also support Step, CosineAnnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9. + power=0.9, # The power of polynomial decay. + min_lr=0.0001, # The minimum learning rate to stable the training. + by_epoch=False) # Whether count by epoch or not. +runner = dict( + type='IterBasedRunner', # Type of runner to use (i.e. IterBasedRunner or EpochBasedRunner) + max_iters=40000) # Total number of iterations. For EpochBasedRunner use `max_epochs` +checkpoint_config = dict( # Config to set the checkpoint hook, Refer to https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py for implementation. + by_epoch=False, # Whether count by epoch or not. + interval=4000) # The save interval. +evaluation = dict( # The config to build the evaluation hook. Please refer to mmseg/core/evaluation/eval_hook.py for details. + interval=4000, # The interval of evaluation. + metric='mIoU') # The evaluation metric. + + +``` + +## FAQ + +### Ignore some fields in the base configs + +Sometimes, you may set `_delete_=True` to ignore some of the fields in base configs. +You may refer to [mmcv](https://mmcv.readthedocs.io/en/latest/understand_mmcv/config.html#inherit-from-base-config-with-ignored-fields) for simple illustration. + +In MMSegmentation, for example, to change the backbone of PSPNet with the following config. + +```python +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='MaskRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict(...), + auxiliary_head=dict(...)) +``` + +`ResNet` and `HRNet` use different keywords to construct. + +```python +_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscpaes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + norm_cfg=norm_cfg, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + decode_head=dict(...), + auxiliary_head=dict(...)) +``` + +The `_delete_=True` would replace all old keys in `backbone` field with new keys. + +### Use intermediate variables in configs + +Some intermediate variables are used in the configs files, like `train_pipeline`/`test_pipeline` in datasets. +It's worth noting that when modifying intermediate variables in the children configs, user need to pass the intermediate variables into corresponding fields again. +For example, we would like to change multi scale strategy to train/test a PSPNet. `train_pipeline`/`test_pipeline` are intermediate variable we would like to modify. + +```python +_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscapes.py' +crop_size = (512, 1024) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(1.0, 2.0)), # change to [1., 2.] + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], # change to multi scale testing + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +``` + +We first define the new `train_pipeline`/`test_pipeline` and pass them into `data`. + +Similarly, if we would like to switch from `SyncBN` to `BN` or `MMSyncBN`, we need to substitute every `norm_cfg` in the config. + +```python +_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscpaes.py' +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + backbone=dict(norm_cfg=norm_cfg), + decode_head=dict(norm_cfg=norm_cfg), + auxiliary_head=dict(norm_cfg=norm_cfg)) +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/customize_datasets.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/customize_datasets.md new file mode 100644 index 0000000..de906d5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/customize_datasets.md @@ -0,0 +1,290 @@ +# Tutorial 2: Customize Datasets + +## Data configuration + +`data` in config file is the variable for data configuration, to define the arguments that are used in datasets and dataloaders. + +Here is an example of data configuration: + +```python +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + train=dict( + type='ADE20KDataset', + data_root='data/ade/ADEChallengeData2016', + img_dir='images/training', + ann_dir='annotations/training', + pipeline=train_pipeline), + val=dict( + type='ADE20KDataset', + data_root='data/ade/ADEChallengeData2016', + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline), + test=dict( + type='ADE20KDataset', + data_root='data/ade/ADEChallengeData2016', + img_dir='images/validation', + ann_dir='annotations/validation', + pipeline=test_pipeline)) +``` + +- `train`, `val` and `test`: The [`config`](https://github.com/open-mmlab/mmcv/blob/master/docs/en/understand_mmcv/config.md)s to build dataset instances for model training, validation and testing by + using [`build and registry`](https://github.com/open-mmlab/mmcv/blob/master/docs/en/understand_mmcv/registry.md) mechanism. + +- `samples_per_gpu`: How many samples per batch and per gpu to load during model training, and the `batch_size` of training is equal to `samples_per_gpu` times gpu number, e.g. when using 8 gpus for distributed data parallel trainig and `samples_per_gpu=4`, the `batch_size` is `8*4=16`. + If you would like to define `batch_size` for testing and validation, please use `test_dataloaser` and + `val_dataloader` with mmseg >=0.24.1. + +- `workers_per_gpu`: How many subprocesses per gpu to use for data loading. `0` means that the data will be loaded in the main process. + +**Note:** `samples_per_gpu` only works for model training, and the default setting of `samples_per_gpu` is 1 in mmseg when model testing and validation (DO NOT support batch inference yet). + +**Note:** before v0.24.1, except `train`, `val` `test`, `samples_per_gpu` and `workers_per_gpu`, the other keys in `data` must be the +input keyword arguments for `dataloader` in pytorch, and the dataloaders used for model training, validation and testing have the same input arguments. +In v0.24.1, mmseg supports to use `train_dataloader`, `test_dataloaser` and `val_dataloader` to specify different keyword arguments, and still supports the overall arguments definition but the specific dataloader setting has a higher priority. + +Here is an example for specific dataloader: + +```python +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + shuffle=True, + train=dict(type='xxx', ...), + val=dict(type='xxx', ...), + test=dict(type='xxx', ...), + # Use different batch size during validation and testing. + val_dataloader=dict(samples_per_gpu=1, workers_per_gpu=4, shuffle=False), + test_dataloader=dict(samples_per_gpu=1, workers_per_gpu=4, shuffle=False)) +``` + +Assume only one gpu used for model training and testing, as the priority of the overall arguments definition is low, the batch_size +for training is `4` and dataset will be shuffled, and batch_size for testing and validation is `1`, and dataset will not be shuffled. + +To make data configuration much clearer, we recommend use specific dataloader setting instead of overall dataloader setting after v0.24.1, just like: + +```python +data = dict( + train=dict(type='xxx', ...), + val=dict(type='xxx', ...), + test=dict(type='xxx', ...), + # Use specific dataloader setting + train_dataloader=dict(samples_per_gpu=4, workers_per_gpu=4, shuffle=True), + val_dataloader=dict(samples_per_gpu=1, workers_per_gpu=4, shuffle=False), + test_dataloader=dict(samples_per_gpu=1, workers_per_gpu=4, shuffle=False)) +``` + +**Note:** in model training, default values in the script of mmseg for dataloader are `shuffle=True, and drop_last=True`, +in model validation and testing, default values are `shuffle=False, and drop_last=False` + +## Customize datasets by reorganizing data + +The simplest way is to convert your dataset to organize your data into folders. + +An example of file structure is as followed. + +```none +├── data +│ ├── my_dataset +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ │ ├── xxx{img_suffix} +│ │ │ │ ├── yyy{img_suffix} +│ │ │ │ ├── zzz{img_suffix} +│ │ │ ├── val +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ │ ├── xxx{seg_map_suffix} +│ │ │ │ ├── yyy{seg_map_suffix} +│ │ │ │ ├── zzz{seg_map_suffix} +│ │ │ ├── val + +``` + +A training pair will consist of the files with same suffix in img_dir/ann_dir. + +If `split` argument is given, only part of the files in img_dir/ann_dir will be loaded. +We may specify the prefix of files we would like to be included in the split txt. + +More specifically, for a split txt like following, + +```none +xxx +zzz +``` + +Only +`data/my_dataset/img_dir/train/xxx{img_suffix}`, +`data/my_dataset/img_dir/train/zzz{img_suffix}`, +`data/my_dataset/ann_dir/train/xxx{seg_map_suffix}`, +`data/my_dataset/ann_dir/train/zzz{seg_map_suffix}` will be loaded. + +:::{note} +The annotations are images of shape (H, W), the value pixel should fall in range `[0, num_classes - 1]`. +You may use `'P'` mode of [pillow](https://pillow.readthedocs.io/en/stable/handbook/concepts.html#palette) to create your annotation image with color. +::: + +## Customize datasets by mixing dataset + +MMSegmentation also supports to mix dataset for training. +Currently it supports to concat, repeat and multi-image mix datasets. + +### Repeat dataset + +We use `RepeatDataset` as wrapper to repeat the dataset. +For example, suppose the original dataset is `Dataset_A`, to repeat it, the config looks like the following + +```python +dataset_A_train = dict( + type='RepeatDataset', + times=N, + dataset=dict( # This is the original config of Dataset_A + type='Dataset_A', + ... + pipeline=train_pipeline + ) + ) +``` + +### Concatenate dataset + +There 2 ways to concatenate the dataset. + +1. If the datasets you want to concatenate are in the same type with different annotation files, + you can concatenate the dataset configs like the following. + + 1. You may concatenate two `ann_dir`. + + ```python + dataset_A_train = dict( + type='Dataset_A', + img_dir = 'img_dir', + ann_dir = ['anno_dir_1', 'anno_dir_2'], + pipeline=train_pipeline + ) + ``` + + 2. You may concatenate two `split`. + + ```python + dataset_A_train = dict( + type='Dataset_A', + img_dir = 'img_dir', + ann_dir = 'anno_dir', + split = ['split_1.txt', 'split_2.txt'], + pipeline=train_pipeline + ) + ``` + + 3. You may concatenate two `ann_dir` and `split` simultaneously. + + ```python + dataset_A_train = dict( + type='Dataset_A', + img_dir = 'img_dir', + ann_dir = ['anno_dir_1', 'anno_dir_2'], + split = ['split_1.txt', 'split_2.txt'], + pipeline=train_pipeline + ) + ``` + + In this case, `ann_dir_1` and `ann_dir_2` are corresponding to `split_1.txt` and `split_2.txt`. + +2. In case the dataset you want to concatenate is different, you can concatenate the dataset configs like the following. + + ```python + dataset_A_train = dict() + dataset_B_train = dict() + + data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train = [ + dataset_A_train, + dataset_B_train + ], + val = dataset_A_val, + test = dataset_A_test + ) + ``` + +A more complex example that repeats `Dataset_A` and `Dataset_B` by N and M times, respectively, and then concatenates the repeated datasets is as the following. + +```python +dataset_A_train = dict( + type='RepeatDataset', + times=N, + dataset=dict( + type='Dataset_A', + ... + pipeline=train_pipeline + ) +) +dataset_A_val = dict( + ... + pipeline=test_pipeline +) +dataset_A_test = dict( + ... + pipeline=test_pipeline +) +dataset_B_train = dict( + type='RepeatDataset', + times=M, + dataset=dict( + type='Dataset_B', + ... + pipeline=train_pipeline + ) +) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train = [ + dataset_A_train, + dataset_B_train + ], + val = dataset_A_val, + test = dataset_A_test +) + +``` + +### Multi-image Mix Dataset + +We use `MultiImageMixDataset` as a wrapper to mix images from multiple datasets. +`MultiImageMixDataset` can be used by multiple images mixed data augmentation +like mosaic and mixup. + +An example of using `MultiImageMixDataset` with `Mosaic` data augmentation: + +```python +train_pipeline = [ + dict(type='RandomMosaic', prob=1), + dict(type='Resize', img_scale=(1024, 512), keep_ratio=True), + dict(type='RandomFlip', prob=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] + +train_dataset = dict( + type='MultiImageMixDataset', + dataset=dict( + classes=classes, + palette=palette, + type=dataset_type, + reduce_zero_label=False, + img_dir=data_root + "images/train", + ann_dir=data_root + "annotations/train", + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + ] + ), + pipeline=train_pipeline +) + +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/customize_models.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/customize_models.md new file mode 100644 index 0000000..f637fd6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/customize_models.md @@ -0,0 +1,234 @@ +# Tutorial 4: Customize Models + +## Customize optimizer + +Assume you want to add a optimizer named as `MyOptimizer`, which has arguments `a`, `b`, and `c`. +You need to first implement the new optimizer in a file, e.g., in `mmseg/core/optimizer/my_optimizer.py`: + +```python +from mmcv.runner import OPTIMIZERS +from torch.optim import Optimizer + + +@OPTIMIZERS.register_module +class MyOptimizer(Optimizer): + + def __init__(self, a, b, c) + +``` + +Then add this module in `mmseg/core/optimizer/__init__.py` thus the registry will +find the new module and add it: + +```python +from .my_optimizer import MyOptimizer +``` + +Then you can use `MyOptimizer` in `optimizer` field of config files. +In the configs, the optimizers are defined by the field `optimizer` like the following: + +```python +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +``` + +To use your own optimizer, the field can be changed as + +```python +optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value) +``` + +We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files. +For example, if you want to use `ADAM`, though the performance will drop a lot, the modification could be as the following. + +```python +optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001) +``` + +The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch. + +## Customize optimizer constructor + +Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNoarm layers. +The users can do those fine-grained parameter tuning through customizing optimizer constructor. + +``` +from mmcv.utils import build_from_cfg + +from mmcv.runner import OPTIMIZER_BUILDERS +from .cocktail_optimizer import CocktailOptimizer + + +@OPTIMIZER_BUILDERS.register_module +class CocktailOptimizerConstructor(object): + + def __init__(self, optimizer_cfg, paramwise_cfg=None): + + def __call__(self, model): + + return my_optimizer + +``` + +## Develop new components + +There are mainly 2 types of components in MMSegmentation. + +- backbone: usually stacks of convolutional network to extract feature maps, e.g., ResNet, HRNet. +- head: the component for semantic segmentation map decoding. + +### Add new backbones + +Here we show how to develop new components with an example of MobileNet. + +1. Create a new file `mmseg/models/backbones/mobilenet.py`. + +```python +import torch.nn as nn + +from ..registry import BACKBONES + + +@BACKBONES.register_module +class MobileNet(nn.Module): + + def __init__(self, arg1, arg2): + pass + + def forward(self, x): # should return a tuple + pass + + def init_weights(self, pretrained=None): + pass +``` + +2. Import the module in `mmseg/models/backbones/__init__.py`. + +```python +from .mobilenet import MobileNet +``` + +3. Use it in your config file. + +```python +model = dict( + ... + backbone=dict( + type='MobileNet', + arg1=xxx, + arg2=xxx), + ... +``` + +### Add new heads + +In MMSegmentation, we provide a base [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/decode_heads/decode_head.py) for all segmentation head. +All newly implemented decode heads should be derived from it. +Here we show how to develop a new head with the example of [PSPNet](https://arxiv.org/abs/1612.01105) as the following. + +First, add a new decode head in `mmseg/models/decode_heads/psp_head.py`. +PSPNet implements a decode head for segmentation decode. +To implement a decode head, basically we need to implement three functions of the new module as the following. + +```python +@HEADS.register_module() +class PSPHead(BaseDecodeHead): + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(PSPHead, self).__init__(**kwargs) + + def init_weights(self): + + def forward(self, inputs): + +``` + +Next, the users need to add the module in the `mmseg/models/decode_heads/__init__.py` thus the corresponding registry could find and load them. + +To config file of PSPNet is as the following + +```python +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) + +``` + +### Add new loss + +Assume you want to add a new loss as `MyLoss` for segmentation decode. +To add a new loss function, the users need implement it in `mmseg/models/losses/my_loss.py`. +The decorator `weighted_loss` enable the loss to be weighted for each element. + +```python +import torch +import torch.nn as nn + +from ..builder import LOSSES +from .utils import weighted_loss + +@weighted_loss +def my_loss(pred, target): + assert pred.size() == target.size() and target.numel() > 0 + loss = torch.abs(pred - target) + return loss + +@LOSSES.register_module +class MyLoss(nn.Module): + + def __init__(self, reduction='mean', loss_weight=1.0): + super(MyLoss, self).__init__() + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None): + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + loss = self.loss_weight * my_loss( + pred, target, weight, reduction=reduction, avg_factor=avg_factor) + return loss +``` + +Then the users need to add it in the `mmseg/models/losses/__init__.py`. + +```python +from .my_loss import MyLoss, my_loss + +``` + +To use it, modify the `loss_xxx` field. +Then you need to modify the `loss_decode` field in the head. +`loss_weight` could be used to balance multiple losses. + +```python +loss_decode=dict(type='MyLoss', loss_weight=1.0)) +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/customize_runtime.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/customize_runtime.md new file mode 100644 index 0000000..72ed770 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/customize_runtime.md @@ -0,0 +1,245 @@ +# Tutorial 6: Customize Runtime Settings + +## Customize optimization settings + +### Customize optimizer supported by Pytorch + +We already support to use all the optimizers implemented by PyTorch, and the only modification is to change the `optimizer` field of config files. +For example, if you want to use `ADAM` (note that the performance could drop a lot), the modification could be as the following. + +```python +optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001) +``` + +To modify the learning rate of the model, the users only need to modify the `lr` in the config of optimizer. The users can directly set arguments following the [API doc](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) of PyTorch. + +### Customize self-implemented optimizer + +#### 1. Define a new optimizer + +A customized optimizer could be defined as following. + +Assume you want to add a optimizer named `MyOptimizer`, which has arguments `a`, `b`, and `c`. +You need to create a new directory named `mmseg/core/optimizer`. +And then implement the new optimizer in a file, e.g., in `mmseg/core/optimizer/my_optimizer.py`: + +```python +from .registry import OPTIMIZERS +from torch.optim import Optimizer + + +@OPTIMIZERS.register_module() +class MyOptimizer(Optimizer): + + def __init__(self, a, b, c) + +``` + +#### 2. Add the optimizer to registry + +To find the above module defined above, this module should be imported into the main namespace at first. There are two options to achieve it. + +- Modify `mmseg/core/optimizer/__init__.py` to import it. + + The newly defined module should be imported in `mmseg/core/optimizer/__init__.py` so that the registry will + find the new module and add it: + +```python +from .my_optimizer import MyOptimizer +``` + +- Use `custom_imports` in the config to manually import it + +```python +custom_imports = dict(imports=['mmseg.core.optimizer.my_optimizer'], allow_failed_imports=False) +``` + +The module `mmseg.core.optimizer.my_optimizer` will be imported at the beginning of the program and the class `MyOptimizer` is then automatically registered. +Note that only the package containing the class `MyOptimizer` should be imported. +`mmseg.core.optimizer.my_optimizer.MyOptimizer` **cannot** be imported directly. + +Actually users can use a totally different file directory structure using this importing method, as long as the module root can be located in `PYTHONPATH`. + +#### 3. Specify the optimizer in the config file + +Then you can use `MyOptimizer` in `optimizer` field of config files. +In the configs, the optimizers are defined by the field `optimizer` like the following: + +```python +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +``` + +To use your own optimizer, the field can be changed to + +```python +optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value) +``` + +### Customize optimizer constructor + +Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers. +The users can do those fine-grained parameter tuning through customizing optimizer constructor. + +```python +from mmcv.utils import build_from_cfg + +from mmcv.runner.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS +from mmseg.utils import get_root_logger +from .my_optimizer import MyOptimizer + + +@OPTIMIZER_BUILDERS.register_module() +class MyOptimizerConstructor(object): + + def __init__(self, optimizer_cfg, paramwise_cfg=None): + + def __call__(self, model): + + return my_optimizer + +``` + +The default optimizer constructor is implemented [here](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/optimizer/default_constructor.py#L11), which could also serve as a template for new optimizer constructor. + +### Additional settings + +Tricks not implemented by the optimizer should be implemented through optimizer constructor (e.g., set parameter-wise learning rates) or hooks. We list some common settings that could stabilize the training or accelerate the training. Feel free to create PR, issue for more settings. + +- __Use gradient clip to stabilize training__: + Some models need gradient clip to clip the gradients to stabilize the training process. An example is as below: + + ```python + optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) + ``` + + If your config inherits the base config which already sets the `optimizer_config`, you might need `_delete_=True` to override the unnecessary settings. See the [config documentation](https://mmsegmentation.readthedocs.io/en/latest/config.html) for more details. + +- __Use momentum schedule to accelerate model convergence__: + We support momentum scheduler to modify model's momentum according to learning rate, which could make the model converge in a faster way. + Momentum scheduler is usually used with LR scheduler, for example, the following config is used in 3D detection to accelerate convergence. + For more details, please refer to the implementation of [CyclicLrUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327) and [CyclicMomentumUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130). + + ```python + lr_config = dict( + policy='cyclic', + target_ratio=(10, 1e-4), + cyclic_times=1, + step_ratio_up=0.4, + ) + momentum_config = dict( + policy='cyclic', + target_ratio=(0.85 / 0.95, 1), + cyclic_times=1, + step_ratio_up=0.4, + ) + ``` + +## Customize training schedules + +By default we use step learning rate with 40k/80k schedule, this calls [`PolyLrUpdaterHook`](https://github.com/open-mmlab/mmcv/blob/826d3a7b68596c824fa1e2cb89b6ac274f52179c/mmcv/runner/hooks/lr_updater.py#L196) in MMCV. +We support many other learning rate schedule [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py), such as `CosineAnnealing` and `Poly` schedule. Here are some examples + +- Step schedule: + + ```python + lr_config = dict(policy='step', step=[9, 10]) + ``` + +- ConsineAnnealing schedule: + + ```python + lr_config = dict( + policy='CosineAnnealing', + warmup='linear', + warmup_iters=1000, + warmup_ratio=1.0 / 10, + min_lr_ratio=1e-5) + ``` + +## Customize workflow + +Workflow is a list of (phase, epochs) to specify the running order and epochs. +By default it is set to be + +```python +workflow = [('train', 1)] +``` + +which means running 1 epoch for training. +Sometimes user may want to check some metrics (e.g. loss, accuracy) about the model on the validate set. +In such case, we can set the workflow as + +```python +[('train', 1), ('val', 1)] +``` + +so that 1 epoch for training and 1 epoch for validation will be run iteratively. + +:::{note} + +1. The parameters of model will not be updated during val epoch. +2. Keyword `total_epochs` in the config only controls the number of training epochs and will not affect the validation workflow. +3. Workflows `[('train', 1), ('val', 1)]` and `[('train', 1)]` will not change the behavior of `EvalHook` because `EvalHook` is called by `after_train_epoch` and validation workflow only affect hooks that are called through `after_val_epoch`. Therefore, the only difference between `[('train', 1), ('val', 1)]` and `[('train', 1)]` is that the runner will calculate losses on validation set after each training epoch. + +::: + +## Customize hooks + +### Use hooks implemented in MMCV + +If the hook is already implemented in MMCV, you can directly modify the config to use the hook as below + +```python +custom_hooks = [ + dict(type='MyHook', a=a_value, b=b_value, priority='NORMAL') +] +``` + +### Modify default runtime hooks + +There are some common hooks that are not registered through `custom_hooks`, they are + +- log_config +- checkpoint_config +- evaluation +- lr_config +- optimizer_config +- momentum_config + +In those hooks, only the logger hook has the `VERY_LOW` priority, others' priority are `NORMAL`. +The above-mentioned tutorials already covers how to modify `optimizer_config`, `momentum_config`, and `lr_config`. +Here we reveals how what we can do with `log_config`, `checkpoint_config`, and `evaluation`. + +#### Checkpoint config + +The MMCV runner will use `checkpoint_config` to initialize [`CheckpointHook`](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/hooks/checkpoint.py#L9). + +```python +checkpoint_config = dict(interval=1) +``` + +The users could set `max_keep_ckpts` to only save only small number of checkpoints or decide whether to store state dict of optimizer by `save_optimizer`. More details of the arguments are [here](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.CheckpointHook) + +#### Log config + +The `log_config` wraps multiple logger hooks and enables to set intervals. Now MMCV supports `WandbLoggerHook`, `MlflowLoggerHook`, and `TensorboardLoggerHook`. +The detail usages can be found in the [doc](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook). + +```python +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) +``` + +#### Evaluation config + +The config of `evaluation` will be used to initialize the [`EvalHook`](https://github.com/open-mmlab/mmsegmentation/blob/e3f6f655d69b777341aec2fe8829871cc0beadcb/mmseg/core/evaluation/eval_hooks.py#L7). +Except the key `interval`, other arguments such as `metric` will be passed to the `dataset.evaluate()` + +```python +evaluation = dict(interval=1, metric='mIoU') +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/data_pipeline.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/data_pipeline.md new file mode 100644 index 0000000..ffa5855 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/data_pipeline.md @@ -0,0 +1,171 @@ +# Tutorial 3: Customize Data Pipelines + +## Design of Data pipelines + +Following typical conventions, we use `Dataset` and `DataLoader` for data loading +with multiple workers. `Dataset` returns a dict of data items corresponding +the arguments of models' forward method. +Since the data in semantic segmentation may not be the same size, +we introduce a new `DataContainer` type in MMCV to help collect and distribute +data of different size. +See [here](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) for more details. + +The data preparation pipeline and the dataset is decomposed. Usually a dataset +defines how to process the annotations and a data pipeline defines all the steps to prepare a data dict. +A pipeline consists of a sequence of operations. Each operation takes a dict as input and also output a dict for the next transform. + +The operations are categorized into data loading, pre-processing, formatting and test-time augmentation. + +Here is an pipeline example for PSPNet. + +```python +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +``` + +For each operation, we list the related dict fields that are added/updated/removed. + +### Data loading + +`LoadImageFromFile` + +- add: img, img_shape, ori_shape + +`LoadAnnotations` + +- add: gt_semantic_seg, seg_fields + +### Pre-processing + +`Resize` + +- add: scale, scale_idx, pad_shape, scale_factor, keep_ratio +- update: img, img_shape, \*seg_fields + +`RandomFlip` + +- add: flip +- update: img, \*seg_fields + +`Pad` + +- add: pad_fixed_size, pad_size_divisor +- update: img, pad_shape, \*seg_fields + +`RandomCrop` + +- update: img, pad_shape, \*seg_fields + +`Normalize` + +- add: img_norm_cfg +- update: img + +`SegRescale` + +- update: gt_semantic_seg + +`PhotoMetricDistortion` + +- update: img + +### Formatting + +`ToTensor` + +- update: specified by `keys`. + +`ImageToTensor` + +- update: specified by `keys`. + +`Transpose` + +- update: specified by `keys`. + +`ToDataContainer` + +- update: specified by `fields`. + +`DefaultFormatBundle` + +- update: img, gt_semantic_seg + +`Collect` + +- add: img_meta (the keys of img_meta is specified by `meta_keys`) +- remove: all other keys except for those specified by `keys` + +### Test time augmentation + +`MultiScaleFlipAug` + +## Extend and use custom pipelines + +1. Write a new pipeline in any file, e.g., `my_pipeline.py`. It takes a dict as input and return a dict. + + ```python + from mmseg.datasets import PIPELINES + + @PIPELINES.register_module() + class MyTransform: + + def __call__(self, results): + results['dummy'] = True + return results + ``` + +2. Import the new class. + + ```python + from .my_pipeline import MyTransform + ``` + +3. Use it in config files. + + ```python + img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + crop_size = (512, 1024) + train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='MyTransform'), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), + ] + ``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/index.rst b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/index.rst new file mode 100644 index 0000000..e1a67a8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/index.rst @@ -0,0 +1,9 @@ +.. toctree:: + :maxdepth: 2 + + config.md + customize_datasets.md + data_pipeline.md + customize_models.md + training_tricks.md + customize_runtime.md diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/training_tricks.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/training_tricks.md new file mode 100644 index 0000000..d40de3d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/tutorials/training_tricks.md @@ -0,0 +1,90 @@ +# Tutorial 5: Training Tricks + +MMSegmentation support following training tricks out of box. + +## Different Learning Rate(LR) for Backbone and Heads + +In semantic segmentation, some methods make the LR of heads larger than backbone to achieve better performance or faster convergence. + +In MMSegmentation, you may add following lines to config to make the LR of heads 10 times of backbone. + +```python +optimizer=dict( + paramwise_cfg = dict( + custom_keys={ + 'head': dict(lr_mult=10.)})) +``` + +With this modification, the LR of any parameter group with `'head'` in name will be multiplied by 10. +You may refer to [MMCV doc](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.DefaultOptimizerConstructor) for further details. + +## Online Hard Example Mining (OHEM) + +We implement pixel sampler [here](https://github.com/open-mmlab/mmsegmentation/tree/master/mmseg/core/seg/sampler) for training sampling. +Here is an example config of training PSPNet with OHEM enabled. + +```python +_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py' +model=dict( + decode_head=dict( + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=100000)) ) +``` + +In this way, only pixels with confidence score under 0.7 are used to train. And we keep at least 100000 pixels during training. If `thresh` is not specified, pixels of top `min_kept` loss will be selected. + +## Class Balanced Loss + +For dataset that is not balanced in classes distribution, you may change the loss weight of each class. +Here is an example for cityscapes dataset. + +```python +_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py' +model=dict( + decode_head=dict( + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, + # DeepLab used this class weight for cityscapes + class_weight=[0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754, + 1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, + 1.0865, 1.0955, 1.0865, 1.1529, 1.0507]))) +``` + +`class_weight` will be passed into `CrossEntropyLoss` as `weight` argument. Please refer to [PyTorch Doc](https://pytorch.org/docs/stable/nn.html?highlight=crossentropy#torch.nn.CrossEntropyLoss) for details. + +## Multiple Losses + +For loss calculation, we support multiple losses training concurrently. Here is an example config of training `unet` on `DRIVE` dataset, whose loss function is `1:3` weighted sum of `CrossEntropyLoss` and `DiceLoss`: + +```python +_base_ = './fcn_unet_s5-d16_64x64_40k_drive.py' +model = dict( + decode_head=dict(loss_decode=[dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0)]), + auxiliary_head=dict(loss_decode=[dict(type='CrossEntropyLoss', loss_name='loss_ce',loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0)]), + ) +``` + +In this way, `loss_weight` and `loss_name` will be weight and name in training log of corresponding loss, respectively. + +Note: If you want this loss item to be included into the backward graph, `loss_` must be the prefix of the name. + +## Ignore specified label index in loss calculation + +In default setting, `avg_non_ignore=False` which means each pixel counts for loss calculation although some of them belong to ignore-index labels. + +For loss calculation, we support ignore index of certain label by `avg_non_ignore` and `ignore_index`. In this way, the average loss would only be calculated in non-ignored labels which may achieve better performance, and here is the [reference](https://github.com/open-mmlab/mmsegmentation/pull/1409). Here is an example config of training `unet` on `Cityscapes` dataset: in loss calculation it would ignore label 0 which is background and loss average is only calculated on non-ignore labels: + +```python +_base_ = './fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py' +model = dict( + decode_head=dict( + ignore_index=0, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, avg_non_ignore=True), + auxiliary_head=dict( + ignore_index=0, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, avg_non_ignore=True)), + )) +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/useful_tools.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/useful_tools.md new file mode 100644 index 0000000..6da2de5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/en/useful_tools.md @@ -0,0 +1,426 @@ +## Useful tools + +Apart from training/testing scripts, We provide lots of useful tools under the +`tools/` directory. + +### Get the FLOPs and params (experimental) + +We provide a script adapted from [flops-counter.pytorch](https://github.com/sovrasov/flops-counter.pytorch) to compute the FLOPs and params of a given model. + +```shell +python tools/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}] +``` + +You will get the result like this. + +```none +============================== +Input shape: (3, 2048, 1024) +Flops: 1429.68 GMac +Params: 48.98 M +============================== +``` + +:::{note} +This tool is still experimental and we do not guarantee that the number is correct. You may well use the result for simple comparisons, but double check it before you adopt it in technical reports or papers. +::: + +(1) FLOPs are related to the input shape while parameters are not. The default input shape is (1, 3, 1280, 800). +(2) Some operators are not counted into FLOPs like GN and custom operators. + +### Publish a model + +Before you upload a model to AWS, you may want to +(1) convert model weights to CPU tensors, (2) delete the optimizer states and +(3) compute the hash of the checkpoint file and append the hash id to the filename. + +```shell +python tools/publish_model.py ${INPUT_FILENAME} ${OUTPUT_FILENAME} +``` + +E.g., + +```shell +python tools/publish_model.py work_dirs/pspnet/latest.pth psp_r50_hszhao_200ep.pth +``` + +The final output filename will be `psp_r50_512x1024_40ki_cityscapes-{hash id}.pth`. + +### Convert to ONNX (experimental) + +We provide a script to convert model to [ONNX](https://github.com/onnx/onnx) format. The converted model could be visualized by tools like [Netron](https://github.com/lutzroeder/netron). Besides, we also support comparing the output results between PyTorch and ONNX model. + +```bash +python tools/pytorch2onnx.py \ + ${CONFIG_FILE} \ + --checkpoint ${CHECKPOINT_FILE} \ + --output-file ${ONNX_FILE} \ + --input-img ${INPUT_IMG} \ + --shape ${INPUT_SHAPE} \ + --rescale-shape ${RESCALE_SHAPE} \ + --show \ + --verify \ + --dynamic-export \ + --cfg-options \ + model.test_cfg.mode="whole" +``` + +Description of arguments: + +- `config` : The path of a model config file. +- `--checkpoint` : The path of a model checkpoint file. +- `--output-file`: The path of output ONNX model. If not specified, it will be set to `tmp.onnx`. +- `--input-img` : The path of an input image for conversion and visualize. +- `--shape`: The height and width of input tensor to the model. If not specified, it will be set to img_scale of test_pipeline. +- `--rescale-shape`: rescale shape of output, set this value to avoid OOM, only work on `slide` mode. +- `--show`: Determines whether to print the architecture of the exported model. If not specified, it will be set to `False`. +- `--verify`: Determines whether to verify the correctness of an exported model. If not specified, it will be set to `False`. +- `--dynamic-export`: Determines whether to export ONNX model with dynamic input and output shapes. If not specified, it will be set to `False`. +- `--cfg-options`:Update config options. + +:::{note} +This tool is still experimental. Some customized operators are not supported for now. +::: + +### Evaluate ONNX model + +We provide `tools/deploy_test.py` to evaluate ONNX model with different backend. + +#### Prerequisite + +- Install onnx and onnxruntime-gpu + + ```shell + pip install onnx onnxruntime-gpu + ``` + +- Install TensorRT following [how-to-build-tensorrt-plugins-in-mmcv](https://mmcv.readthedocs.io/en/latest/tensorrt_plugin.html#how-to-build-tensorrt-plugins-in-mmcv)(optional) + +#### Usage + +```bash +python tools/deploy_test.py \ + ${CONFIG_FILE} \ + ${MODEL_FILE} \ + ${BACKEND} \ + --out ${OUTPUT_FILE} \ + --eval ${EVALUATION_METRICS} \ + --show \ + --show-dir ${SHOW_DIRECTORY} \ + --cfg-options ${CFG_OPTIONS} \ + --eval-options ${EVALUATION_OPTIONS} \ + --opacity ${OPACITY} \ +``` + +Description of all arguments + +- `config`: The path of a model config file. +- `model`: The path of a converted model file. +- `backend`: Backend of the inference, options: `onnxruntime`, `tensorrt`. +- `--out`: The path of output result file in pickle format. +- `--format-only` : Format the output results without perform evaluation. It is useful when you want to format the result to a specific format and submit it to the test server. If not specified, it will be set to `False`. Note that this argument is **mutually exclusive** with `--eval`. +- `--eval`: Evaluation metrics, which depends on the dataset, e.g., "mIoU" for generic datasets, and "cityscapes" for Cityscapes. Note that this argument is **mutually exclusive** with `--format-only`. +- `--show`: Show results flag. +- `--show-dir`: Directory where painted images will be saved +- `--cfg-options`: Override some settings in the used config file, the key-value pair in `xxx=yyy` format will be merged into config file. +- `--eval-options`: Custom options for evaluation, the key-value pair in `xxx=yyy` format will be kwargs for `dataset.evaluate()` function +- `--opacity`: Opacity of painted segmentation map. In (0, 1\] range. + +#### Results and Models + +| Model | Config | Dataset | Metric | PyTorch | ONNXRuntime | TensorRT-fp32 | TensorRT-fp16 | +| :--------: | :---------------------------------------------: | :--------: | :----: | :-----: | :---------: | :-----------: | :-----------: | +| FCN | fcn_r50-d8_512x1024_40k_cityscapes.py | cityscapes | mIoU | 72.2 | 72.2 | 72.2 | 72.2 | +| PSPNet | pspnet_r50-d8_512x1024_40k_cityscapes.py | cityscapes | mIoU | 77.8 | 77.8 | 77.8 | 77.8 | +| deeplabv3 | deeplabv3_r50-d8_512x1024_40k_cityscapes.py | cityscapes | mIoU | 79.0 | 79.0 | 79.0 | 79.0 | +| deeplabv3+ | deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py | cityscapes | mIoU | 79.6 | 79.5 | 79.5 | 79.5 | +| PSPNet | pspnet_r50-d8_769x769_40k_cityscapes.py | cityscapes | mIoU | 78.2 | 78.1 | | | +| deeplabv3 | deeplabv3_r50-d8_769x769_40k_cityscapes.py | cityscapes | mIoU | 78.5 | 78.3 | | | +| deeplabv3+ | deeplabv3plus_r50-d8_769x769_40k_cityscapes.py | cityscapes | mIoU | 78.9 | 78.7 | | | + +:::{note} +TensorRT is only available on configs with `whole mode`. +::: + +### Convert to TorchScript (experimental) + +We also provide a script to convert model to [TorchScript](https://pytorch.org/docs/stable/jit.html) format. You can use the pytorch C++ API [LibTorch](https://pytorch.org/docs/stable/cpp_index.html) inference the trained model. The converted model could be visualized by tools like [Netron](https://github.com/lutzroeder/netron). Besides, we also support comparing the output results between PyTorch and TorchScript model. + +```shell +python tools/pytorch2torchscript.py \ + ${CONFIG_FILE} \ + --checkpoint ${CHECKPOINT_FILE} \ + --output-file ${ONNX_FILE} + --shape ${INPUT_SHAPE} + --verify \ + --show +``` + +Description of arguments: + +- `config` : The path of a pytorch model config file. +- `--checkpoint` : The path of a pytorch model checkpoint file. +- `--output-file`: The path of output TorchScript model. If not specified, it will be set to `tmp.pt`. +- `--input-img` : The path of an input image for conversion and visualize. +- `--shape`: The height and width of input tensor to the model. If not specified, it will be set to `512 512`. +- `--show`: Determines whether to print the traced graph of the exported model. If not specified, it will be set to `False`. +- `--verify`: Determines whether to verify the correctness of an exported model. If not specified, it will be set to `False`. + +:::{note} +It's only support PyTorch>=1.8.0 for now. +::: + +:::{note} +This tool is still experimental. Some customized operators are not supported for now. +::: + +Examples: + +- Convert the cityscapes PSPNet pytorch model. + + ```shell + python tools/pytorch2torchscript.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ + --checkpoint checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ + --output-file checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pt \ + --shape 512 1024 + ``` + +### Convert to TensorRT (experimental) + +A script to convert [ONNX](https://github.com/onnx/onnx) model to [TensorRT](https://developer.nvidia.com/tensorrt) format. + +Prerequisite + +- install `mmcv-full` with ONNXRuntime custom ops and TensorRT plugins follow [ONNXRuntime in mmcv](https://mmcv.readthedocs.io/en/latest/deployment/onnxruntime_op.html) and [TensorRT plugin in mmcv](https://github.com/open-mmlab/mmcv/blob/master/docs/en/deployment/tensorrt_plugin.md). +- Use [pytorch2onnx](#convert-to-onnx-experimental) to convert the model from PyTorch to ONNX. + +Usage + +```bash +python ${MMSEG_PATH}/tools/onnx2tensorrt.py \ + ${CFG_PATH} \ + ${ONNX_PATH} \ + --trt-file ${OUTPUT_TRT_PATH} \ + --min-shape ${MIN_SHAPE} \ + --max-shape ${MAX_SHAPE} \ + --input-img ${INPUT_IMG} \ + --show \ + --verify +``` + +Description of all arguments + +- `config` : Config file of the model. +- `model` : Path to the input ONNX model. +- `--trt-file` : Path to the output TensorRT engine. +- `--max-shape` : Maximum shape of model input. +- `--min-shape` : Minimum shape of model input. +- `--fp16` : Enable fp16 model conversion. +- `--workspace-size` : Max workspace size in GiB. +- `--input-img` : Image for visualize. +- `--show` : Enable result visualize. +- `--dataset` : Palette provider, `CityscapesDataset` as default. +- `--verify` : Verify the outputs of ONNXRuntime and TensorRT. +- `--verbose` : Whether to verbose logging messages while creating TensorRT engine. Defaults to False. + +:::{note} +Only tested on whole mode. +::: + +## Miscellaneous + +### Print the entire config + +`tools/print_config.py` prints the whole config verbatim, expanding all its +imports. + +```shell +python tools/print_config.py \ + ${CONFIG} \ + --graph \ + --cfg-options ${OPTIONS [OPTIONS...]} \ +``` + +Description of arguments: + +- `config` : The path of a pytorch model config file. +- `--graph` : Determines whether to print the models graph. +- `--cfg-options`: Custom options to replace the config file. + +### Plot training logs + +`tools/analyze_logs.py` plots loss/mIoU curves given a training log file. `pip install seaborn` first to install the dependency. + +```shell +python tools/analyze_logs.py xxx.log.json [--keys ${KEYS}] [--legend ${LEGEND}] [--backend ${BACKEND}] [--style ${STYLE}] [--out ${OUT_FILE}] +``` + +Examples: + +- Plot the mIoU, mAcc, aAcc metrics. + + ```shell + python tools/analyze_logs.py log.json --keys mIoU mAcc aAcc --legend mIoU mAcc aAcc + ``` + +- Plot loss metric. + + ```shell + python tools/analyze_logs.py log.json --keys loss --legend loss + ``` + +### Model conversion + +`tools/model_converters/` provide several scripts to convert pretrain models released by other repos to MMSegmentation style. + +#### ViT Swin MiT Transformer Models + +- ViT + + `tools/model_converters/vit2mmseg.py` convert keys in timm pretrained vit models to MMSegmentation style. + + ```shell + python tools/model_converters/vit2mmseg.py ${SRC} ${DST} + ``` + +- Swin + + `tools/model_converters/swin2mmseg.py` convert keys in official pretrained swin models to MMSegmentation style. + + ```shell + python tools/model_converters/swin2mmseg.py ${SRC} ${DST} + ``` + +- SegFormer + + `tools/model_converters/mit2mmseg.py` convert keys in official pretrained mit models to MMSegmentation style. + + ```shell + python tools/model_converters/mit2mmseg.py ${SRC} ${DST} + ``` + +## Model Serving + +In order to serve an `MMSegmentation` model with [`TorchServe`](https://pytorch.org/serve/), you can follow the steps: + +### 1. Convert model from MMSegmentation to TorchServe + +```shell +python tools/torchserve/mmseg2torchserve.py ${CONFIG_FILE} ${CHECKPOINT_FILE} \ +--output-folder ${MODEL_STORE} \ +--model-name ${MODEL_NAME} +``` + +:::{note} +${MODEL_STORE} needs to be an absolute path to a folder. +::: + +### 2. Build `mmseg-serve` docker image + +```shell +docker build -t mmseg-serve:latest docker/serve/ +``` + +### 3. Run `mmseg-serve` + +Check the official docs for [running TorchServe with docker](https://github.com/pytorch/serve/blob/master/docker/README.md#running-torchserve-in-a-production-docker-environment). + +In order to run in GPU, you need to install [nvidia-docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). You can omit the `--gpus` argument in order to run in CPU. + +Example: + +```shell +docker run --rm \ +--cpus 8 \ +--gpus device=0 \ +-p8080:8080 -p8081:8081 -p8082:8082 \ +--mount type=bind,source=$MODEL_STORE,target=/home/model-server/model-store \ +mmseg-serve:latest +``` + +[Read the docs](https://github.com/pytorch/serve/blob/072f5d088cce9bb64b2a18af065886c9b01b317b/docs/rest_api.md) about the Inference (8080), Management (8081) and Metrics (8082) APIs + +### 4. Test deployment + +```shell +curl -O https://raw.githubusercontent.com/open-mmlab/mmsegmentation/master/resources/3dogs.jpg +curl http://127.0.0.1:8080/predictions/${MODEL_NAME} -T 3dogs.jpg -o 3dogs_mask.png +``` + +The response will be a ".png" mask. + +You can visualize the output as follows: + +```python +import matplotlib.pyplot as plt +import mmcv +plt.imshow(mmcv.imread("3dogs_mask.png", "grayscale")) +plt.show() +``` + +You should see something similar to: + +![3dogs_mask](../../resources/3dogs_mask.png) + +And you can use `test_torchserve.py` to compare result of torchserve and pytorch, and visualize them. + +```shell +python tools/torchserve/test_torchserve.py ${IMAGE_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} ${MODEL_NAME} +[--inference-addr ${INFERENCE_ADDR}] [--result-image ${RESULT_IMAGE}] [--device ${DEVICE}] +``` + +Example: + +```shell +python tools/torchserve/test_torchserve.py \ +demo/demo.png \ +configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py \ +checkpoint/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth \ +fcn +``` + +## Confusion Matrix + +In order to generate and plot a `nxn` confusion matrix where `n` is the number of classes, you can follow the steps: + +### 1.Generate a prediction result in pkl format using `test.py` + +```shell +python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${PATH_TO_RESULT_FILE}] +``` + +Note that the argument for `--eval` should be `None` so that the result file contains numpy type of prediction results. The usage for distribution test is just the same. + +Example: + +```shell +python tools/test.py \ +configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py \ +checkpoint/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth \ +--out result/pred_result.pkl +``` + +### 2. Use `confusion_matrix.py` to generate and plot a confusion matrix + +```shell +python tools/confusion_matrix.py ${CONFIG_FILE} ${PATH_TO_RESULT_FILE} ${SAVE_DIR} --show +``` + +Description of arguments: + +- `config`: Path to the test config file. +- `prediction_path`: Path to the prediction .pkl result. +- `save_dir`: Directory where confusion matrix will be saved. +- `--show`: Enable result visualize. +- `--color-theme`: Theme of the matrix color map. +- `--cfg_options`: Custom options to replace the config file. + +Example: + +```shell +python tools/confusion_matrix.py \ +configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py \ +result/pred_result.pkl \ +result/confusion_matrix \ +--show +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/Makefile b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/_static/css/readthedocs.css b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/_static/css/readthedocs.css new file mode 100644 index 0000000..2e38d08 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/_static/css/readthedocs.css @@ -0,0 +1,6 @@ +.header-logo { + background-image: url("../images/mmsegmentation.png"); + background-size: 201px 40px; + height: 40px; + width: 201px; +} diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/_static/images/mmsegmentation.png b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/_static/images/mmsegmentation.png new file mode 100644 index 0000000..009083a Binary files /dev/null and b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/_static/images/mmsegmentation.png differ diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/api.rst b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/api.rst new file mode 100644 index 0000000..8285841 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/api.rst @@ -0,0 +1,58 @@ +mmseg.apis +-------------- +.. automodule:: mmseg.apis + :members: + +mmseg.core +-------------- + +seg +^^^^^^^^^^ +.. automodule:: mmseg.core.seg + :members: + +evaluation +^^^^^^^^^^ +.. automodule:: mmseg.core.evaluation + :members: + +utils +^^^^^^^^^^ +.. automodule:: mmseg.core.utils + :members: + +mmseg.datasets +-------------- + +datasets +^^^^^^^^^^ +.. automodule:: mmseg.datasets + :members: + +pipelines +^^^^^^^^^^ +.. automodule:: mmseg.datasets.pipelines + :members: + +mmseg.models +-------------- + +segmentors +^^^^^^^^^^ +.. automodule:: mmseg.models.segmentors + :members: + +backbones +^^^^^^^^^^ +.. automodule:: mmseg.models.backbones + :members: + +decode_heads +^^^^^^^^^^^^ +.. automodule:: mmseg.models.decode_heads + :members: + +losses +^^^^^^^^^^ +.. automodule:: mmseg.models.losses + :members: diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/conf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/conf.py new file mode 100644 index 0000000..4dec48d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/conf.py @@ -0,0 +1,134 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import subprocess +import sys + +import pytorch_sphinx_theme + +sys.path.insert(0, os.path.abspath('../../')) + +# -- Project information ----------------------------------------------------- + +project = 'MMSegmentation' +copyright = '2020-2021, OpenMMLab' +author = 'MMSegmentation Authors' +version_file = '../../mmseg/version.py' + + +def get_version(): + with open(version_file, 'r') as f: + exec(compile(f.read(), version_file, 'exec')) + return locals()['__version__'] + + +# The full version, including alpha/beta/rc tags +release = get_version() + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx.ext.viewcode', + 'sphinx_markdown_tables', 'sphinx_copybutton', 'myst_parser' +] + +autodoc_mock_imports = [ + 'matplotlib', 'pycocotools', 'mmseg.version', 'mmcv.ops' +] + +# Ignore >>> when copying code +copybutton_prompt_text = r'>>> |\.\.\. ' +copybutton_prompt_is_regexp = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = { + '.rst': 'restructuredtext', + '.md': 'markdown', +} + +# The master toctree document. +master_doc = 'index' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +# html_theme = 'sphinx_rtd_theme' +html_theme = 'pytorch_sphinx_theme' +html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] +html_theme_options = { + 'logo_url': + 'https://mmsegmentation.readthedocs.io/zh-CN/latest/', + 'menu': [ + { + 'name': + '教程', + 'url': + 'https://github.com/open-mmlab/mmsegmentation/blob/master/' + 'demo/MMSegmentation_Tutorial.ipynb' + }, + { + 'name': 'GitHub', + 'url': 'https://github.com/open-mmlab/mmsegmentation' + }, + { + 'name': + '上游库', + 'children': [ + { + 'name': 'MMCV', + 'url': 'https://github.com/open-mmlab/mmcv', + 'description': '基础视觉库' + }, + ] + }, + ], + # Specify the language of shared menu + 'menu_lang': + 'cn', +} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] +html_css_files = ['css/readthedocs.css'] + +# Enable ::: for my_st +myst_enable_extensions = ['colon_fence'] +myst_heading_anchors = 3 + +language = 'zh-CN' + + +def builder_inited_handler(app): + subprocess.run(['./stat.py']) + + +def setup(app): + app.connect('builder-inited', builder_inited_handler) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/dataset_prepare.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/dataset_prepare.md new file mode 100644 index 0000000..6b9c821 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/dataset_prepare.md @@ -0,0 +1,319 @@ +## 准备数据集 + +推荐用软链接,将数据集根目录链接到 `$MMSEGMENTATION/data` 里。如果您的文件夹结构是不同的,您也许可以试着修改配置文件里对应的路径。 + +```none +mmsegmentation +├── mmseg +├── tools +├── configs +├── data +│ ├── cityscapes +│ │ ├── leftImg8bit +│ │ │ ├── train +│ │ │ ├── val +│ │ ├── gtFine +│ │ │ ├── train +│ │ │ ├── val +│ ├── VOCdevkit +│ │ ├── VOC2012 +│ │ │ ├── JPEGImages +│ │ │ ├── SegmentationClass +│ │ │ ├── ImageSets +│ │ │ │ ├── Segmentation +│ │ ├── VOC2010 +│ │ │ ├── JPEGImages +│ │ │ ├── SegmentationClassContext +│ │ │ ├── ImageSets +│ │ │ │ ├── SegmentationContext +│ │ │ │ │ ├── train.txt +│ │ │ │ │ ├── val.txt +│ │ │ ├── trainval_merged.json +│ │ ├── VOCaug +│ │ │ ├── dataset +│ │ │ │ ├── cls +│ ├── ade +│ │ ├── ADEChallengeData2016 +│ │ │ ├── annotations +│ │ │ │ ├── training +│ │ │ │ ├── validation +│ │ │ ├── images +│ │ │ │ ├── training +│ │ │ │ ├── validation +│ ├── CHASE_DB1 +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── DRIVE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── HRF +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +│ ├── STARE +│ │ ├── images +│ │ │ ├── training +│ │ │ ├── validation +│ │ ├── annotations +│ │ │ ├── training +│ │ │ ├── validation +| ├── dark_zurich +| │   ├── gps +| │   │   ├── val +| │   │   └── val_ref +| │   ├── gt +| │   │   └── val +| │   ├── LICENSE.txt +| │   ├── lists_file_names +| │   │   ├── val_filenames.txt +| │   │   └── val_ref_filenames.txt +| │   ├── README.md +| │   └── rgb_anon +| │   | ├── val +| │   | └── val_ref +| ├── NighttimeDrivingTest +| | ├── gtCoarse_daytime_trainvaltest +| | │   └── test +| | │   └── night +| | └── leftImg8bit +| | | └── test +| | | └── night +│ ├── loveDA +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ │ ├── test +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── potsdam +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── vaihingen +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +│ ├── iSAID +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ ├── val +│ │ │ ├── test +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ ├── val +``` + +### Cityscapes + +注册成功后,数据集可以在 [这里](https://www.cityscapes-dataset.com/downloads/) 下载。 + +通常情况下,`**labelTrainIds.png` 被用来训练 cityscapes。 +基于 [cityscapesscripts](https://github.com/mcordts/cityscapesScripts), +我们提供了一个 [脚本](https://github.com/open-mmlab/mmsegmentation/blob/master/tools/convert_datasets/cityscapes.py), +去生成 `**labelTrainIds.png`。 + +```shell +# --nproc 8 意味着有 8 个进程用来转换,它也可以被忽略。 +python tools/convert_datasets/cityscapes.py data/cityscapes --nproc 8 +``` + +### Pascal VOC + +Pascal VOC 2012 可以在 [这里](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar) 下载。 +此外,许多最近在 Pascal VOC 数据集上的工作都会利用增广的数据,它们可以在 [这里](http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz) 找到。 + +如果您想使用增广后的 VOC 数据集,请运行下面的命令来将数据增广的标注转成正确的格式。 + +```shell +# --nproc 8 意味着有 8 个进程用来转换,它也可以被忽略。 +python tools/convert_datasets/voc_aug.py data/VOCdevkit data/VOCdevkit/VOCaug --nproc 8 +``` + +关于如何拼接数据集 (concatenate) 并一起训练它们,更多细节请参考 [拼接连接数据集](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/tutorials/customize_datasets.md#%E6%8B%BC%E6%8E%A5%E6%95%B0%E6%8D%AE%E9%9B%86) 。 + +### ADE20K + +ADE20K 的训练集和验证集可以在 [这里](http://data.csail.mit.edu/places/ADEchallenge/ADEChallengeData2016.zip) 下载。 +您还可以在 [这里](http://data.csail.mit.edu/places/ADEchallenge/release_test.zip) 下载验证集。 + +### Pascal Context + +Pascal Context 的训练集和验证集可以在 [这里](http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar) 下载。 +注册成功后,您还可以在 [这里](http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2010test.tar) 下载验证集。 + +为了从原始数据集里切分训练集和验证集, 您可以在 [这里](https://codalabuser.blob.core.windows.net/public/trainval_merged.json) +下载 trainval_merged.json。 + +如果您想使用 Pascal Context 数据集, +请安装 [细节](https://github.com/zhanghang1989/detail-api) 然后再运行如下命令来把标注转换成正确的格式。 + +```shell +python tools/convert_datasets/pascal_context.py data/VOCdevkit data/VOCdevkit/VOC2010/trainval_merged.json +``` + +### CHASE DB1 + +CHASE DB1 的训练集和验证集可以在 [这里](https://staffnet.kingston.ac.uk/~ku15565/CHASE_DB1/assets/CHASEDB1.zip) 下载。 + +为了将 CHASE DB1 数据集转换成 MMSegmentation 的格式,您需要运行如下命令: + +```shell +python tools/convert_datasets/chase_db1.py /path/to/CHASEDB1.zip +``` + +这个脚本将自动生成正确的文件夹结构。 + +### DRIVE + +DRIVE 的训练集和验证集可以在 [这里](https://drive.grand-challenge.org/) 下载。 +在此之前,您需要注册一个账号,当前 '1st_manual' 并未被官方提供,因此需要您从其他地方获取。 + +为了将 DRIVE 数据集转换成 MMSegmentation 格式,您需要运行如下命令: + +```shell +python tools/convert_datasets/drive.py /path/to/training.zip /path/to/test.zip +``` + +这个脚本将自动生成正确的文件夹结构。 + +### HRF + +首先,下载 [healthy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy.zip) [glaucoma.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma.zip), [diabetic_retinopathy.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy.zip), [healthy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/healthy_manualsegm.zip), [glaucoma_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/glaucoma_manualsegm.zip) 以及 [diabetic_retinopathy_manualsegm.zip](https://www5.cs.fau.de/fileadmin/research/datasets/fundus-images/diabetic_retinopathy_manualsegm.zip) 。 + +为了将 HRF 数据集转换成 MMSegmentation 格式,您需要运行如下命令: + +```shell +python tools/convert_datasets/hrf.py /path/to/healthy.zip /path/to/healthy_manualsegm.zip /path/to/glaucoma.zip /path/to/glaucoma_manualsegm.zip /path/to/diabetic_retinopathy.zip /path/to/diabetic_retinopathy_manualsegm.zip +``` + +这个脚本将自动生成正确的文件夹结构。 + +### STARE + +首先,下载 [stare-images.tar](http://cecas.clemson.edu/~ahoover/stare/probing/stare-images.tar), [labels-ah.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-ah.tar) 和 [labels-vk.tar](http://cecas.clemson.edu/~ahoover/stare/probing/labels-vk.tar) 。 + +为了将 STARE 数据集转换成 MMSegmentation 格式,您需要运行如下命令: + +```shell +python tools/convert_datasets/stare.py /path/to/stare-images.tar /path/to/labels-ah.tar /path/to/labels-vk.tar +``` + +这个脚本将自动生成正确的文件夹结构。 + +### Dark Zurich + +因为我们只支持在此数据集上测试模型,所以您只需下载[验证集](https://data.vision.ee.ethz.ch/csakarid/shared/GCMA_UIoU/Dark_Zurich_val_anon.zip) 。 + +### Nighttime Driving + +因为我们只支持在此数据集上测试模型,所以您只需下载[测试集](http://data.vision.ee.ethz.ch/daid/NighttimeDriving/NighttimeDrivingTest.zip) 。 + +### LoveDA + +可以从 Google Drive 里下载 [LoveDA数据集](https://drive.google.com/drive/folders/1ibYV0qwn4yuuh068Rnc-w4tPi0U0c-ti?usp=sharing) 。 + +或者它还可以从 [zenodo](https://zenodo.org/record/5706578#.YZvN7SYRXdF) 下载, 您需要运行如下命令: + +```shell +# Download Train.zip +wget https://zenodo.org/record/5706578/files/Train.zip +# Download Val.zip +wget https://zenodo.org/record/5706578/files/Val.zip +# Download Test.zip +wget https://zenodo.org/record/5706578/files/Test.zip +``` + +对于 LoveDA 数据集,请运行以下命令下载并重新组织数据集 + +```shell +python tools/convert_datasets/loveda.py /path/to/loveDA +``` + +请参照 [这里](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/inference.md) 来使用训练好的模型去预测 LoveDA 测试集并且提交到官网。 + +关于 LoveDA 的更多细节可以在[这里](https://github.com/Junjue-Wang/LoveDA) 找到。 + +### ISPRS Potsdam + +[Potsdam](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-potsdam/) +数据集是一个有着2D 语义分割内容标注的城市遥感数据集。 +数据集可以从挑战[主页](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/) 获得。 +需要其中的 '2_Ortho_RGB.zip' 和 '5_Labels_all_noBoundary.zip'。 + +对于 Potsdam 数据集,请运行以下命令下载并重新组织数据集 + +```shell +python tools/convert_datasets/potsdam.py /path/to/potsdam +``` + +使用我们默认的配置, 将生成 3456 张图片的训练集和 2016 张图片的验证集。 + +### ISPRS Vaihingen + +[Vaihingen](https://www2.isprs.org/commissions/comm2/wg4/benchmark/2d-sem-label-vaihingen/) +数据集是一个有着2D 语义分割内容标注的城市遥感数据集。 + +数据集可以从挑战 [主页](https://www2.isprs.org/commissions/comm2/wg4/benchmark/data-request-form/). +需要其中的 'ISPRS_semantic_labeling_Vaihingen.zip' 和 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE.zip'。 + +对于 Vaihingen 数据集,请运行以下命令下载并重新组织数据集 + +```shell +python tools/convert_datasets/vaihingen.py /path/to/vaihingen +``` + +使用我们默认的配置 (`clip_size`=512, `stride_size`=256), 将生成 344 张图片的训练集和 398 张图片的验证集。 + +### iSAID + +iSAID 数据集(训练集/验证集/测试集)的图像可以从 [DOTA-v1.0](https://captain-whu.github.io/DOTA/dataset.html) 下载. + +iSAID 数据集(训练集/验证集)的注释可以从 [iSAID](https://captain-whu.github.io/iSAID/dataset.html) 下载. + +该数据集是一个大规模的实例分割(也可以用于语义分割)的遥感数据集. + +下载后,在数据集转换前,您需要将数据集文件夹调整成如下格式. + +``` +│ ├── iSAID +│ │ ├── train +│ │ │ ├── images +│ │ │ │ ├── part1.zip +│ │ │ │ ├── part2.zip +│ │ │ │ ├── part3.zip +│ │ │ ├── Semantic_masks +│ │ │ │ ├── images.zip +│ │ ├── val +│ │ │ ├── images +│ │ │ │ ├── part1.zip +│ │ │ ├── Semantic_masks +│ │ │ │ ├── images.zip +│ │ ├── test +│ │ │ ├── images +│ │ │ │ ├── part1.zip +│ │ │ │ ├── part2.zip +``` + +```shell +python tools/convert_datasets/isaid.py /path/to/iSAID +``` + +使用我们默认的配置 (`patch_width`=896, `patch_height`=896, `overlap_area`=384), 将生成 33978 张图片的训练集和 11644 张图片的验证集。 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/faq.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/faq.md new file mode 100644 index 0000000..d2038f1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/faq.md @@ -0,0 +1,64 @@ +# 常见问题解答(FAQ) + +我们在这里列出了使用时的一些常见问题及其相应的解决方案。 如果您发现有一些问题被遗漏,请随时提 PR 丰富这个列表。 如果您无法在此获得帮助,请使用 [issue模板](https://github.com/open-mmlab/mmsegmentation/blob/master/.github/ISSUE_TEMPLATE/error-report.md/)创建问题,但是请在模板中填写所有必填信息,这有助于我们更快定位问题。 + +## 安装 + +兼容的MMSegmentation和MMCV版本如下。请安装正确版本的MMCV以避免安装问题。 + +| MMSegmentation version | MMCV version | MMClassification version | +| :--------------------: | :-------------------------: | :----------------------: | +| master | mmcv-full>=1.5.0, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | +| 0.25.0 | mmcv-full>=1.5.0, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | +| 0.24.1 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | +| 0.23.0 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | +| 0.22.0 | mmcv-full>=1.4.4, \<=1.6.0 | mmcls>=0.20.1, \<=1.0.0 | +| 0.21.1 | mmcv-full>=1.4.4, \<=1.6.0 | Not required | +| 0.20.2 | mmcv-full>=1.3.13, \<=1.6.0 | Not required | +| 0.19.0 | mmcv-full>=1.3.13, \<1.3.17 | Not required | +| 0.18.0 | mmcv-full>=1.3.13, \<1.3.17 | Not required | +| 0.17.0 | mmcv-full>=1.3.7, \<1.3.17 | Not required | +| 0.16.0 | mmcv-full>=1.3.7, \<1.3.17 | Not required | +| 0.15.0 | mmcv-full>=1.3.7, \<1.3.17 | Not required | +| 0.14.1 | mmcv-full>=1.3.7, \<1.3.17 | Not required | +| 0.14.0 | mmcv-full>=1.3.1, \<1.3.2 | Not required | +| 0.13.0 | mmcv-full>=1.3.1, \<1.3.2 | Not required | +| 0.12.0 | mmcv-full>=1.1.4, \<1.3.2 | Not required | +| 0.11.0 | mmcv-full>=1.1.4, \<1.3.0 | Not required | +| 0.10.0 | mmcv-full>=1.1.4, \<1.3.0 | Not required | +| 0.9.0 | mmcv-full>=1.1.4, \<1.3.0 | Not required | +| 0.8.0 | mmcv-full>=1.1.4, \<1.2.0 | Not required | +| 0.7.0 | mmcv-full>=1.1.2, \<1.2.0 | Not required | +| 0.6.0 | mmcv-full>=1.1.2, \<1.2.0 | Not required | + +如果你安装了mmcv,你需要先运行`pip uninstall mmcv`。 +如果mmcv和mmcv-full都安装了,会出现 "ModuleNotFoundError"。 + +- "No module named 'mmcv.ops'"; "No module named 'mmcv.\_ext'". + 1. 使用`pip uninstall mmcv`卸载环境中现有的mmcv。 + 2. 按照[安装说明](get_started#best-practices)安装mmcv-full。 + +## 如何获知模型训练时需要的显卡数量 + +- 看模型的config文件的命名。可以参考[学习配置文件](https://github.com/open-mmlab/mmsegmentation/blob/master/docs/zh_cn/tutorials/config.md)中的`配置文件命名风格`部分。比如,对于名字为`segformer_mit-b0_8x1_1024x1024_160k_cityscapes.py`的config文件,`8x1`代表训练其对应的模型需要的卡数为8,每张卡中的batch size为1。 +- 看模型的log文件。点开该模型的log文件,并在其中搜索`nGPU`,在`nGPU`后的数字个数即训练时所需的卡数。比如,在log文件中搜索`nGPU`得到`nGPU 0,1,2,3,4,5,6,7`的记录,则说明训练该模型需要使用八张卡。 + +## auxiliary head 是什么 + +简单来说,这是一个提高准确率的深度监督技术。在训练阶段,`decode_head` 用于输出语义分割的结果,`auxiliary_head` 只是增加了一个辅助损失,其产生的分割结果对你的模型结果没有影响,仅在在训练中起作用。你可以阅读这篇[论文](https://arxiv.org/pdf/1612.01105.pdf)了解更多信息。 + +## 为什么日志文件没有被创建 + +在训练脚本中,我们在第167行调用 `get_root_logger` 方法,然后 mmseg 的 `get_root_logger` 方法调用 mmcv 的 `get_logger`,mmcv 将返回在 'mmsegmentation/tools/train.py' 中使用参数 `log_file` 初始化的同一个 logger。在训练期间只存在一个用 `log_file` 初始化的 logger。 + +参考:[https://github.com/open-mmlab/mmcv/blob/21bada32560c7ed7b15b017dc763d862789e29a8/mmcv/utils/logging.py#L9-L16](https://github.com/open-mmlab/mmcv/blob/21bada32560c7ed7b15b017dc763d862789e29a8/mmcv/utils/logging.py#L9-L16) + +如果你发现日志文件没有被创建,可以检查 `mmcv.utils.get_logger` 是否在其他地方被调用。 + +## 运行测试脚本时如何输出绘制分割掩膜的图像 + +在测试脚本中,我们提供了`show-dir`参数来控制是否输出绘制的图像。用户可以运行以下命令: + +```shell +python tools/test.py {config} {checkpoint} --show-dir {/path/to/save/image} --opacity 1 +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/get_started.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/get_started.md new file mode 100644 index 0000000..b825646 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/get_started.md @@ -0,0 +1,209 @@ +# 依赖 + +在本节中,我们将演示如何用PyTorch准备一个环境。 + +MMSegmentation 可以在 Linux、Windows 和 MacOS 上运行。它需要 Python 3.6 以上,CUDA 9.2 以上和 PyTorch 1.3 以上。 + +```{note} +如果您对PyTorch有经验并且已经安装了它,请跳到下一节。否则,您可以按照以下步骤进行准备。 +``` + +**第一步** 从[官方网站](https://docs.conda.io/en/latest/miniconda.html)下载并安装 Miniconda。 + +**第二步** 创建并激活一个 conda 环境。 + +```shell +conda create --name openmmlab python=3.8 -y +conda activate openmmlab +``` + +**第三步** 按照[官方说明](https://pytorch.org/get-started/locally/)安装 PyTorch。 + +在 GPU 平台上: + +```shell +conda install pytorch torchvision -c pytorch +``` + +在 CPU 平台上: + +```shell +conda install pytorch torchvision cpuonly -c pytorch +``` + +# 安装 + +我们建议用户遵循我们的最佳实践来安装MMSegmentation,同时整个过程是高度可定制的。更多信息见[自定义安装](#customize-installation)部分。 + +## 最佳实践 + +**第一步** 使用 [MIM](https://github.com/open-mmlab/mim) 安装 [MMCV](https://github.com/open-mmlab/mmcv) + +```shell +pip install -U openmim +mim install mmcv-full +``` + +**第二步** 安装 MMSegmentation + +根据具体需求,我们支持两种安装模式: + +- [从源码安装(推荐)](#%E4%BB%8E%E6%BA%90%E7%A0%81%E5%AE%89%E8%A3%85):如果基于 MMSegmentation 框架开发自己的任务,需要添加新的功能,比如新的模型或是数据集,或者使用我们提供的各种工具。 +- [作为 Python 包安装](#%E4%BD%9C%E4%B8%BA-python-%E5%8C%85%E5%AE%89%E8%A3%85):只是希望调用 MMSegmentation 的接口,或者在自己的项目中导入 MMSegmentation 中的模块。 + +### 从源码安装 + +```shell +git clone https://github.com/open-mmlab/mmsegmentation.git +cd mmsegmentation +pip install -v -e . +# "-v "指详细说明,或更多的输出 +# "-e" 表示在可编辑模式下安装项目,因此对代码所做的任何本地修改都会生效,从而无需重新安# 装。 +``` + +### 作为 Python 包安装 + +```shell +pip install mmsegmentation +``` + +## 验证安装 + +为了验证 MMSegmentation 是否安装正确,我们提供了一些示例代码来执行模型推理。 + +**第一步** 我们需要下载配置文件和模型权重文件。 + +```shell +mim download mmsegmentation --config pspnet_r50-d8_512x1024_40k_cityscapes --dest . +``` + +下载将需要几秒钟或更长时间,这取决于你的网络环境。完成后,你会在当前文件夹中发现两个文件`pspnet_r50-d8_512x1024_40k_cityscapes.py`和`pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth`。 + +**第二步** 验证推理示例 + +如果您是**从源码安装**的 MMSegmentation,那么直接运行以下命令进行验证: + +```shell +python demo/image_demo.py demo/demo.png pspnet_r50-d8_512x1024_40k_cityscapes.py pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth --device cpu --out-file result.jpg +``` + +你会在你的当前文件夹中看到一个新的图像`result.jpg`,其中的分割掩膜覆盖在所有对象上。 + +如果您是**作为 PyThon 包安装**,那么可以打开您的 Python 解释器,复制并粘贴如下代码: + +```python +from mmseg.apis import inference_segmentor, init_segmentor +import mmcv + +config_file = 'pspnet_r50-d8_512x1024_40k_cityscapes.py' +checkpoint_file = 'pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth' + +# 通过配置文件和模型权重文件构建模型 +model = init_segmentor(config_file, checkpoint_file, device='cuda:0') + +# 对单张图片进行推理并展示结果 +img = 'test.jpg' # or img = mmcv.imread(img), which will only load it once +result = inference_segmentor(model, img) +# 在新窗口中可视化推理结果 +model.show_result(img, result, show=True) +# 或将可视化结果存储在文件中 +# 你可以修改 opacity 在(0,1]之间的取值来改变绘制好的分割图的透明度 +model.show_result(img, result, out_file='result.jpg', opacity=0.5) + +# 对视频进行推理并展示结果 +video = mmcv.VideoReader('video.mp4') +for frame in video: + result = inference_segmentor(model, frame) + model.show_result(frame, result, wait_time=1) +``` + +你可以修改上面的代码来测试一张图片或一段视频,这两种方式都可以验证安装是否成功。 + +## 自定义安装 + +### CUDA 版本 + +在安装 PyTorch 时,您需要指定 CUDA 的版本。如果您不清楚应该选择哪一个,请遵循我们的建议。 + +- 对于 Ampere 架构的 NVIDIA GPU,例如 GeForce 30 系列 以及 NVIDIA A100,CUDA 11 是必需的。 +- 对于更早的 NVIDIA GPU,CUDA 11 是向后兼容 (backward compatible) 的,但 CUDA 10.2 能够提供更好的兼容性,也更加轻量。 + +请确保您的 GPU 驱动版本满足最低的版本需求,参阅[这张表](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions__table-cuda-toolkit-driver-versions)。 + +```{note} +如果按照我们的最佳实践进行安装,CUDA 运行时库就足够了,因为我们提供相关 CUDA 代码的预编译,您不需要进行本地编译。 +但如果您希望从源码进行 MMCV 的编译,或是进行其他 CUDA 算子的开发,那么就必须安装完整的 CUDA 工具链,参见 +[NVIDIA 官网](https://developer.nvidia.com/cuda-downloads),另外还需要确保该 CUDA 工具链的版本与 PyTorch 安装时 +的配置相匹配(如用 `conda install` 安装 PyTorch 时指定的 cudatoolkit 版本)。 +``` + +### 不使用 MIM 安装 MMCV + +MMCV 包含 C++ 和 CUDA 扩展,因此其对 PyTorch 的依赖比较复杂。MIM 会自动解析这些 +依赖,选择合适的 MMCV 预编译包,使安装更简单,但它并不是必需的。 + +要使用 pip 而不是 MIM 来安装 MMCV,请遵照 [MMCV 安装指南](https://mmcv.readthedocs.io/zh_CN/latest/get_started/installation.html)。 +它需要您用指定 url 的形式手动指定对应的 PyTorch 和 CUDA 版本。 + +举个例子,如下命令将会安装基于 PyTorch 1.10.x 和 CUDA 11.3 编译的 mmcv-full。 + +```shell +pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu113/torch1.10/index.html +``` + +### 在 CPU 环境中安装 + +MMPose 可以仅在 CPU 环境中安装,在 CPU 模式下,您可以完成训练(需要 MMCV 版本 >= 1.4.4)、测试和模型推理等所有操作。 + +### 在 Google Colab 中安装 + +[Google Colab](https://colab.research.google.com/) 通常已经包含了 PyTorch 环境,因此我们只需要安装 MMCV 和 MMPose 即可,命令如下: + +**第一步** 使用 [MIM](https://github.com/open-mmlab/mim) 安装 [MMCV](https://github.com/open-mmlab/mmcv) + +```shell +!pip3 install openmim +!mim install mmcv-full +``` + +**第二步** 从源码安装 MMSegmentation + +```shell +!git clone https://github.com/open-mmlab/mmsegmentation.git +%cd mmsegmentation +!pip install -e . +``` + +**第三步** 验证 + +```python +import mmseg +print(mmseg.__version__) +# 预期输出:0.24.1 或其他版本号 +``` + +```{note} +在 Jupyter 中,感叹号 `!` 用于执行外部命令,而 `%cd` 是一个[魔术命令](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-cd),用于切换 Python 的工作路径。 +``` + +### 通过 Docker 使用 MMSegmentation + +我们提供了一个[Dockerfile](https://github.com/open-mmlab/mmsegmentation/blob/master/docker/Dockerfile)来构建一个镜像。请确保你的[docker版本](https://docs.docker.com/engine/install/) >=19.03。 + +```shell +# build an image with PyTorch 1.11, CUDA 11.3 +# If you prefer other versions, just modified the Dockerfile +docker build -t mmsegmentation docker/ +``` + +用以下命令运行 Docker 镜像: + +```shell +docker run --gpus all --shm-size=8g -it -v {DATA_DIR}:/mmpose/data mmpose +``` + +## 故障解决 + +如果你在安装过程中遇到一些问题,请先查看[FAQ](faq.md)页面。 + +如果没有找到解决方案,你也可以在GitHub上[打开一个问题](https://github.com/open-mmlab/mmsegmentation/issues/new/choose)。 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/imgs/qq_group_qrcode.jpg b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/imgs/qq_group_qrcode.jpg new file mode 100644 index 0000000..09ec51f Binary files /dev/null and b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/imgs/qq_group_qrcode.jpg differ diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/imgs/zhihu_qrcode.jpg b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/imgs/zhihu_qrcode.jpg new file mode 100644 index 0000000..c745fb0 Binary files /dev/null and b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/imgs/zhihu_qrcode.jpg differ diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/index.rst b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/index.rst new file mode 100644 index 0000000..4ac9211 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/index.rst @@ -0,0 +1,63 @@ +欢迎来到 MMSegmentation 的文档! +======================================= + +.. toctree:: + :maxdepth: 2 + :caption: 开始你的第一步 + + get_started.md + +.. toctree:: + :maxdepth: 1 + :caption: 数据集准备 + + dataset_prepare.md + +.. toctree:: + :maxdepth: 1 + :caption: 模型库 + + model_zoo.md + modelzoo_statistics.md + +.. toctree:: + :maxdepth: 2 + :caption: 快速启动 + + train.md + inference.md + +.. toctree:: + :maxdepth: 2 + :caption: 教程 + + tutorials/index.rst + +.. toctree:: + :maxdepth: 2 + :caption: 实用工具与脚本 + + useful_tools.md + +.. toctree:: + :maxdepth: 2 + :caption: 说明 + + changelog.md + faq.md + +.. toctree:: + :caption: 语言切换 + + switch_language.md + +.. toctree:: + :caption: 接口文档(英文) + + api.rst + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`search` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/inference.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/inference.md new file mode 100644 index 0000000..a9bd9b0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/inference.md @@ -0,0 +1,127 @@ +## 使用预训练模型推理 + +我们提供测试脚本来评估完整数据集(Cityscapes, PASCAL VOC, ADE20k 等)上的结果,同时为了使其他项目的整合更容易,也提供一些高级 API。 + +### 测试一个数据集 + +- 单卡 GPU +- CPU +- 单节点多卡 GPU +- 多节点 + +您可以使用以下命令来测试一个数据集。 + +```shell +# 单卡 GPU 测试 +python tools/test.py ${配置文件} ${检查点文件} [--out ${结果文件}] [--eval ${评估指标}] [--show] + +# CPU: 如果机器没有 GPU, 则跟上述单卡 GPU 测试一致 +# CPU: 如果机器有 GPU, 那么先禁用 GPU 再运行单 GPU 测试脚本 +export CUDA_VISIBLE_DEVICES=-1 # 禁用 GPU +python tools/test.py ${配置文件} ${检查点文件} [--out ${结果文件}] [--eval ${评估指标}] [--show] + +# 多卡GPU 测试 +./tools/dist_test.sh ${配置文件} ${检查点文件} ${GPU数目} [--out ${结果文件}] [--eval ${评估指标}] +``` + +可选参数: + +- `RESULT_FILE`: pickle 格式的输出结果的文件名,如果不专门指定,结果将不会被专门保存成文件。(MMseg v0.17 之后,args.out 将只会保存评估时的中间结果或者是分割图的保存路径。) +- `EVAL_METRICS`: 在结果里将被评估的指标。这主要取决于数据集, `mIoU` 对于所有数据集都可获得,像 Cityscapes 数据集可以通过 `cityscapes` 命令来专门评估,就像标准的 `mIoU`一样。 +- `--show`: 如果被指定,分割结果将会在一张图像里画出来并且在另一个窗口展示。它仅仅是用来调试与可视化,并且仅针对单卡 GPU 测试。请确认 GUI 在您的环境里可用,否则您也许会遇到报错 `cannot connect to X server` +- `--show-dir`: 如果被指定,分割结果将会在一张图像里画出来并且保存在指定文件夹里。它仅仅是用来调试与可视化,并且仅针对单卡GPU测试。使用该参数时,您的环境不需要 GUI。 +- `--eval-options`: 评估时的可选参数,当设置 `efficient_test=True` 时,它将会保存中间结果至本地文件里以节约 CPU 内存。请确认您本地硬盘有足够的存储空间(大于20GB)。(MMseg v0.17 之后,`efficient_test` 不再生效,我们重构了 test api,通过使用一种渐近式的方式来提升评估和保存结果的效率。) + +例子: + +假设您已经下载检查点文件至文件夹 `checkpoints/` 里。 + +1. 测试 PSPNet 并可视化结果。按下任何键会进行到下一张图 + + ```shell + python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ + checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ + --show + ``` + +2. 测试 PSPNet 并保存画出的图以便于之后的可视化 + + ```shell + python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ + checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ + --show-dir psp_r50_512x1024_40ki_cityscapes_results + ``` + +3. 在数据集 PASCAL VOC (不保存测试结果) 上测试 PSPNet 并评估 mIoU + + ```shell + python tools/test.py configs/pspnet/pspnet_r50-d8_512x1024_20k_voc12aug.py \ + checkpoints/pspnet_r50-d8_512x1024_20k_voc12aug_20200605_003338-c57ef100.pth \ + --eval mAP + ``` + +4. 使用4卡 GPU 测试 PSPNet,并且在标准 mIoU 和 cityscapes 指标里评估模型 + + ```shell + ./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ + checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ + 4 --out results.pkl --eval mIoU cityscapes + ``` + + 注意:在 cityscapes mIoU 和我们的 mIoU 指标会有一些差异 (~0.1%) 。因为 cityscapes 默认是根据类别样本数的多少进行加权平均,而我们对所有的数据集都是采取直接平均的方法来得到 mIoU。 + +5. 在 cityscapes 数据集上4卡 GPU 测试 PSPNet, 并生成 png 文件以便提交给官方评估服务器 + + 首先,在配置文件里添加内容: `configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py`, + + ```python + data = dict( + test=dict( + img_dir='leftImg8bit/test', + ann_dir='gtFine/test')) + ``` + + 随后,进行测试。 + + ```shell + ./tools/dist_test.sh configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ + checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ + 4 --format-only --eval-options "imgfile_prefix=./pspnet_test_results" + ``` + + 您会在文件夹 `./pspnet_test_results` 里得到生成的 png 文件。 + 您也许可以运行 `zip -r results.zip pspnet_test_results/` 并提交 zip 文件给 [evaluation server](https://www.cityscapes-dataset.com/submit/) 。 + +6. 在 Cityscapes 数据集上使用 CPU 高效内存选项来测试 DeeplabV3+ `mIoU` 指标 (没有保存测试结果) + + ```shell + python tools/test.py \ + configs/deeplabv3plus/deeplabv3plus_r18-d8_512x1024_80k_cityscapes.py \ + deeplabv3plus_r18-d8_512x1024_80k_cityscapes_20201226_080942-cff257fe.pth \ + --eval-options efficient_test=True \ + --eval mIoU + ``` + + 使用 `pmap` 可查看 CPU 内存情况, `efficient_test=True` 会使用约 2.25GB 的 CPU 内存, `efficient_test=False` 会使用约 11.06GB 的 CPU 内存。 这个可选参数可以节约很多 CPU 内存。(MMseg v0.17 之后, `efficient_test` 参数将不再生效, 我们使用了一种渐近的方式来更加有效快速地评估和保存结果。) + +7. 在 LoveDA 数据集上1卡 GPU 测试 PSPNet, 并生成 png 文件以便提交给官方评估服务器 + + 首先,在配置文件里添加内容: `configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py`, + + ```python + data = dict( + test=dict( + img_dir='img_dir/test', + ann_dir='ann_dir/test')) + ``` + + 随后,进行测试。 + + ```shell + python ./tools/test.py configs/pspnet/pspnet_r50-d8_512x512_80k_loveda.py \ + checkpoints/pspnet_r50-d8_512x512_80k_loveda_20211104_155728-88610f9f.pth \ + --format-only --eval-options "imgfile_prefix=./pspnet_test_results" + ``` + + 您会在文件夹 `./pspnet_test_results` 里得到生成的 png 文件。 + 您也许可以运行 `zip -r -j Results.zip pspnet_test_results/` 并提交 zip 文件给 [evaluation server](https://codalab.lisn.upsaclay.fr/competitions/421) 。 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/make.bat b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/make.bat new file mode 100644 index 0000000..922152e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/model_zoo.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/model_zoo.md new file mode 100644 index 0000000..b9a0986 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/model_zoo.md @@ -0,0 +1,152 @@ +# 标准与模型库 + +## 共同设定 + +- 我们默认使用 4 卡分布式训练 +- 所有 PyTorch 风格的 ImageNet 预训练网络由我们自己训练,和 [论文](https://arxiv.org/pdf/1812.01187.pdf) 保持一致。 + 我们的 ResNet 网络是基于 ResNetV1c 的变种,在这里输入层的 7x7 卷积被 3个 3x3 取代 +- 为了在不同的硬件上保持一致,我们以 `torch.cuda.max_memory_allocated()` 的最大值作为 GPU 占用率,同时设置 `torch.backends.cudnn.benchmark=False`。 + 注意,这通常比 `nvidia-smi` 显示的要少 +- 我们以网络 forward 和后处理的时间加和作为推理时间,除去数据加载时间。我们使用脚本 `tools/benchmark.py` 来获取推理时间,它在 `torch.backends.cudnn.benchmark=False` 的设定下,计算 200 张图片的平均推理时间 +- 在框架中,有两种推理模式 + - `slide` 模式(滑动模式):测试的配置文件字段 `test_cfg` 会是 `dict(mode='slide', crop_size=(769, 769), stride=(513, 513))`. + 在这个模式下,从原图中裁剪多个小图分别输入网络中进行推理。小图的大小和小图之间的距离由 `crop_size` 和 `stride` 决定,重合区域会进行平均 + - `whole` 模式 (全图模式):测试的配置文件字段 `test_cfg` 会是 `dict(mode='whole')`. 在这个模式下,全图会被直接输入到网络中进行推理。 + 对于 769x769 下训练的模型,我们默认使用 `slide` 进行推理,其余模型用 `whole` 进行推理 +- 对于输入大小为 8x+1 (比如769),我们使用 `align_corners=True`。其余情况,对于输入大小为 8x (比如 512,1024),我们使用 `align_corners=False` + +## 基线 + +### FCN + +请参考 [FCN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fcn) 获得详细信息。 + +### PSPNet + +请参考 [PSPNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/pspnet) 获得详细信息。 + +### DeepLabV3 + +请参考 [DeepLabV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3) 获得详细信息。 + +### PSANet + +请参考 [PSANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/psanet) 获得详细信息。 + +### DeepLabV3+ + +请参考 [DeepLabV3+](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/deeplabv3plus) 获得详细信息。 + +### UPerNet + +请参考 [UPerNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/upernet) 获得详细信息。 + +### NonLocal Net + +请参考 [NonLocal Net](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/nlnet) 获得详细信息。 + +### EncNet + +请参考 [EncNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/encnet) 获得详细信息。 + +### CCNet + +请参考 [CCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ccnet) 获得详细信息。 + +### DANet + +请参考 [DANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/danet) 获得详细信息。 + +### APCNet + +请参考 [APCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/apcnet) 获得详细信息。 + +### HRNet + +请参考 [HRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/hrnet) 获得详细信息。 + +### GCNet + +请参考 [GCNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/gcnet) 获得详细信息。 + +### DMNet + +请参考 [DMNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dmnet) 获得详细信息。 + +### ANN + +请参考 [ANN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ann) 获得详细信息。 + +### OCRNet + +请参考 [OCRNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/ocrnet) 获得详细信息。 + +### Fast-SCNN + +请参考 [Fast-SCNN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/fastscnn) 获得详细信息。 + +### ResNeSt + +请参考 [ResNeSt](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/resnest) 获得详细信息。 + +### Semantic FPN + +请参考 [Semantic FPN](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/semfpn) 获得详细信息。 + +### PointRend + +请参考 [PointRend](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/point_rend) 获得详细信息。 + +### MobileNetV2 + +请参考 [MobileNetV2](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v2) 获得详细信息。 + +### MobileNetV3 + +请参考 [MobileNetV3](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/mobilenet_v3) 获得详细信息。 + +### EMANet + +请参考 [EMANet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/emanet) 获得详细信息。 + +### DNLNet + +请参考 [DNLNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/dnlnet) 获得详细信息。 + +### CGNet + +请参考 [CGNet](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/cgnet) 获得详细信息。 + +### Mixed Precision (FP16) Training + +请参考 [Mixed Precision (FP16) Training 在 BiSeNetV2 训练的样例](https://github.com/open-mmlab/mmsegmentation/blob/master/configs/bisenetv2/bisenetv2_fcn_fp16_4x4_1024x1024_160k_cityscapes.py) 获得详细信息。 + +## 速度标定 + +### 硬件 + +- 8 NVIDIA Tesla V100 (32G) GPUs +- Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz + +### 软件环境 + +- Python 3.7 +- PyTorch 1.5 +- CUDA 10.1 +- CUDNN 7.6.03 +- NCCL 2.4.08 + +### 训练速度 + +为了公平比较,我们全部使用 ResNet-101V1c 进行标定。输入大小为 1024x512,批量样本数为 2。 + +训练速度如下表,指标为每次迭代的时间,以秒为单位,越低越快。 + +| Implementation | PSPNet (s/iter) | DeepLabV3+ (s/iter) | +| --------------------------------------------------------------------------- | --------------- | ------------------- | +| [MMSegmentation](https://github.com/open-mmlab/mmsegmentation) | **0.83** | **0.85** | +| [SegmenTron](https://github.com/LikeLy-Journey/SegmenTron) | 0.84 | 0.85 | +| [CASILVision](https://github.com/CSAILVision/semantic-segmentation-pytorch) | 1.15 | N/A | +| [vedaseg](https://github.com/Media-Smart/vedaseg) | 0.95 | 1.25 | + +注意:DeepLabV3+ 的输出步长为 8。 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/stat.py b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/stat.py new file mode 100644 index 0000000..b3a1d73 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/stat.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# Copyright (c) OpenMMLab. All rights reserved. +import functools as func +import glob +import os.path as osp +import re + +import numpy as np + +url_prefix = 'https://github.com/open-mmlab/mmsegmentation/blob/master/' + +files = sorted(glob.glob('../../configs/*/README.md')) + +stats = [] +titles = [] +num_ckpts = 0 + +for f in files: + url = osp.dirname(f.replace('../../', url_prefix)) + + with open(f, 'r') as content_file: + content = content_file.read() + + title = content.split('\n')[0].replace('#', '').strip() + ckpts = set(x.lower().strip() + for x in re.findall(r'https?://download.*\.pth', content) + if 'mmsegmentation' in x) + if len(ckpts) == 0: + continue + + _papertype = [ + x for x in re.findall(r'', content) + ] + assert len(_papertype) > 0 + papertype = _papertype[0] + + paper = set([(papertype, title)]) + + titles.append(title) + num_ckpts += len(ckpts) + statsmsg = f""" +\t* [{papertype}] [{title}]({url}) ({len(ckpts)} ckpts) +""" + stats.append((paper, ckpts, statsmsg)) + +allpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats]) +msglist = '\n'.join(x for _, _, x in stats) + +papertypes, papercounts = np.unique([t for t, _ in allpapers], + return_counts=True) +countstr = '\n'.join( + [f' - {t}: {c}' for t, c in zip(papertypes, papercounts)]) + +modelzoo = f""" +# 模型库统计数据 + +* 论文数量: {len(set(titles))} +{countstr} + +* 模型数量: {num_ckpts} +{msglist} +""" + +with open('modelzoo_statistics.md', 'w') as f: + f.write(modelzoo) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/switch_language.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/switch_language.md new file mode 100644 index 0000000..f58efc4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/switch_language.md @@ -0,0 +1,3 @@ +## English + +## 简体中文 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/train.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/train.md new file mode 100644 index 0000000..a54f28f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/train.md @@ -0,0 +1,159 @@ +## 训练一个模型 + +MMSegmentation 可以执行分布式训练和非分布式训练,分别使用 `MMDistributedDataParallel` 和 `MMDataParallel` 命令。 + +所有的输出(日志 log 和检查点 checkpoints )将被保存到工作路径文件夹里,它可以通过配置文件里的 `work_dir` 指定。 + +在一定迭代轮次后,我们默认在验证集上评估模型表现。您可以在训练配置文件中添加间隔参数来改变评估间隔。 + +```python +evaluation = dict(interval=4000) # 每4000 iterations 评估一次模型的性能 +``` + +**\*重要提示\***: 在配置文件里的默认学习率是针对4卡 GPU 和2张图/GPU (此时 batchsize = 4x2 = 8)来设置的。 +同样,您也可以使用8卡 GPU 和 1张图/GPU 的设置,因为所有的模型均使用 cross-GPU 的 SyncBN 模式。 + +我们可以在训练速度和 GPU 显存之间做平衡。当模型或者 Batch Size 比较大的时,可以传递`--cfg-options model.backbone.with_cp=True` ,使用 `with_cp` 来节省显存,但是速度会更慢,因为原先使用 `with_cp` 时,是逐层反向传播(Back Propagation, BP),不会保存所有的梯度。 + +### 使用单台机器训练 + +#### 使用单卡 GPU 训练 + +```shell +python tools/train.py ${CONFIG_FILE} [可选参数] +``` + +如果您想在命令里定义工作文件夹路径,您可以添加一个参数`--work-dir ${工作路径}`。 + +#### 使用 CPU 训练 + +如果计算机没有 GPU,那么使用 CPU 训练的流程和使用单 GPU 训练的流程一致。如果计算机有 GPU 但是想使用 CPU,我们仅需要在训练流程开始前禁用 GPU。 + +```shell +export CUDA_VISIBLE_DEVICES=-1 +``` + +之后运行单 GPU 训练脚本即可。 + +```{warning} +我们不推荐用户使用 CPU 进行训练,这太过缓慢。我们支持这个功能是为了方便用户在没有 GPU 的机器上进行调试。 +``` + +#### 使用多卡 GPU 训练 + +```shell +sh tools/dist_train.sh ${CONFIG_FILE} ${GPUS} [可选参数] +``` + +可选参数可以为: + +- `--no-validate` (**不推荐**): 训练时代码库默认会在每 k 轮迭代后在验证集上进行评估,如果不需评估使用命令 `--no-validate` +- `--work-dir ${工作路径}`: 在配置文件里重写工作路径文件夹 +- `--resume-from ${检查点文件}`: 继续使用先前的检查点 (checkpoint) 文件(可以继续训练过程) +- `--load-from ${检查点文件}`: 从一个检查点 (checkpoint) 文件里加载权重(对另一个任务进行精调) +- `--deterministic`: 选择此模式会减慢训练速度,但结果易于复现 + +`resume-from` 和 `load-from` 的区别: + +- `resume-from` 加载出模型权重和优化器状态包括迭代轮数等 +- `load-from` 仅加载模型权重,从第0轮开始训练 + +示例: + +```shell +# 模型的权重和日志将会存储在这个路径下: WORK_DIR=work_dirs/pspnet_r50-d8_512x512_80k_ade20k/ +# 如果work_dir没有被设定,它将会被自动生成 +sh tools/dist_train.sh configs/pspnet/pspnet_r50-d8_512x512_80k_ade20k.py 8 --work_dir work_dirs/pspnet_r50-d8_512x512_80k_ade20k/ --deterministic +``` + +**注意**: 在训练时,模型的和日志保存在“work_dirs/”下的配置文件的相同文件夹结构中。不建议使用自定义的“work_dirs/”,因为验证脚本可以从配置文件名中推断工作目录。如果你想在其他地方保存模型的权重,请使用符号链接,例如: + +```shell +ln -s ${YOUR_WORK_DIRS} ${MMSEG}/work_dirs +``` + +#### 在单个机器上启动多个任务 + +如果您在单个机器上启动多个任务,例如在8卡 GPU 的一个机器上有2个4卡 GPU 的训练任务,您需要特别对每个任务指定不同的端口(默认为29500)来避免通讯冲突。否则,将会有报错信息 `RuntimeError: Address already in use`。 + +如果您使用命令 `dist_train.sh` 来启动一个训练任务,您可以在命令行的用环境变量 `PORT` 设置端口: + +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 sh tools/dist_train.sh ${CONFIG_FILE} 4 +CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 sh tools/dist_train.sh ${CONFIG_FILE} 4 +``` + +### 使用多台机器训练 + +如果您想使用由 ethernet 连接起来的多台机器, 您可以使用以下命令: + +在第一台机器上: + +```shell +NNODES=2 NODE_RANK=0 PORT=$MASTER_PORT MASTER_ADDR=$MASTER_ADDR sh tools/dist_train.sh $CONFIG $GPUS +``` + +在第二台机器上: + +```shell +NNODES=2 NODE_RANK=1 PORT=$MASTER_PORT MASTER_ADDR=$MASTER_ADDR sh tools/dist_train.sh $CONFIG $GPUS +``` + +但是,如果您不使用高速网路连接这几台机器的话,训练将会非常慢。 + +### 使用slurm管理任务 + +Slurm是一个很好的计算集群作业调度系统。在由Slurm管理的集群中,可以使用slurm_train.sh来进行训练。它同时支持单节点和多节点训练。 + +在多台机器上训练: + +```shell +[GPUS=${GPUS}] sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} ${CONFIG_FILE} --work-dir ${WORK_DIR} +``` + +这里有一个在dev分区上使用16块GPUs来训练PSPNet的例子: + +```shell +GPUS=16 sh tools/slurm_train.sh dev pspr50 configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py work_dirs/pspnet_r50-d8_512x1024_40k_cityscapes/ +``` + +当使用 `slurm_train.sh` 在一个节点上启动多个任务时,需要指定不同的端口号,这里提供了三种设置: + +方式1: + +在`config1.py`中设置: + +```python +dist_params = dict(backend='nccl', port=29500) +``` + +在`config2.py`中设置: + +```python +dist_params = dict(backend='nccl', port=29501) +``` + +然后就可以使用config1.py和config2.py启动两个作业: + +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py tmp_work_dir_1 +CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py tmp_work_dir_2 +``` + +方式2: + +您可以设置不同的通信端口,而不需要修改配置文件,但必须设置“cfg-options”,以覆盖配置文件中的默认端口。 + +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py tmp_work_dir_1 --cfg-options dist_params.port=29500 +CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py tmp_work_dir_2 --cfg-options dist_params.port=29501 +``` + +方式3: + +您可以使用环境变量’ MASTER_PORT ‘在命令中设置端口: + +```shell +CUDA_VISIBLE_DEVICES=0,1,2,3 GPUS=4 MASTER_PORT=29500 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config1.py tmp_work_dir_1 +CUDA_VISIBLE_DEVICES=4,5,6,7 GPUS=4 MASTER_PORT=29501 sh tools/slurm_train.sh ${PARTITION} ${JOB_NAME} config2.py tmp_work_dir_2 +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/config.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/config.md new file mode 100644 index 0000000..7cee611 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/config.md @@ -0,0 +1,377 @@ +# 教程 1: 学习配置文件 + +我们整合了模块和继承设计到我们的配置里,这便于做很多实验。如果您想查看配置文件,您可以运行 `python tools/print_config.py /PATH/TO/CONFIG` 去查看完整的配置文件。您还可以传递参数 +`--cfg-options xxx.yyy=zzz` 去查看更新的配置。 + +## 配置文件的结构 + +在 `config/_base_` 文件夹下面有4种基本组件类型: 数据集(dataset),模型(model),训练策略(schedule)和运行时的默认设置(default runtime)。许多方法都可以方便地通过组合这些组件进行实现。 +这样,像 DeepLabV3, PSPNet 这样的模型可以容易地被构造。被来自 `_base_` 下的组件来构建的配置叫做 _原始配置 (primitive)_。 + +对于所有在同一个文件夹下的配置文件,推荐**只有一个**对应的**原始配置**文件。所有其他的配置文件都应该继承自这个**原始配置**文件。这样就能保证配置文件的最大继承深度为 3。 + +为了便于理解,我们推荐社区贡献者继承已有的方法配置文件。 +例如,如果一些修改是基于 DeepLabV3,使用者首先首先应该通过指定 `_base_ = ../deeplabv3/deeplabv3_r50_512x1024_40ki_cityscapes.py`来继承基础 DeepLabV3 结构,再去修改配置文件里其他内容以完成继承。 + +如果您正在构建一个完整的新模型,它完全没有和已有的方法共享一些结构,您可能需要在 `configs` 下面创建一个文件夹 `xxxnet`。 +更详细的文档,请参照 [mmcv](https://mmcv.readthedocs.io/en/latest/understand_mmcv/config.html) 。 + +## 配置文件命名风格 + +我们按照下面的风格去命名配置文件,社区贡献者被建议使用同样的风格。 + +``` +{model}_{backbone}_[misc]_[gpu x batch_per_gpu]_{resolution}_{iterations}_{dataset} +``` + +`{xxx}` 是被要求的文件 `[yyy]` 是可选的。 + +- `{model}`: 模型种类,例如 `psp`, `deeplabv3` 等等 +- `{backbone}`: 主干网络种类,例如 `r50` (ResNet-50), `x101` (ResNeXt-101) +- `[misc]`: 模型中各式各样的设置/插件,例如 `dconv`, `gcb`, `attention`, `mstrain` +- `[gpu x batch_per_gpu]`: GPU数目 和每个 GPU 的样本数, 默认为 `8x2` +- `{iterations}`: 训练迭代轮数,如`160k` +- `{dataset}`: 数据集,如 `cityscapes`, `voc12aug`, `ade` + +## PSPNet 的一个例子 + +为了帮助使用者熟悉这个流行的语义分割框架的完整配置文件和模块,我们在下面对使用 ResNet50V1c 的 PSPNet 的配置文件做了详细的注释说明。 +更多的详细使用和其他模块的替代项请参考 API 文档。 + +```python +norm_cfg = dict(type='SyncBN', requires_grad=True) # 分割框架通常使用 SyncBN +model = dict( + type='EncoderDecoder', # 分割器(segmentor)的名字 + pretrained='open-mmlab://resnet50_v1c', # 将被加载的 ImageNet 预训练主干网络 + backbone=dict( + type='ResNetV1c', # 主干网络的类别。 可用选项请参考 mmseg/models/backbones/resnet.py + depth=50, # 主干网络的深度。通常为 50 和 101。 + num_stages=4, # 主干网络状态(stages)的数目,这些状态产生的特征图作为后续的 head 的输入。 + out_indices=(0, 1, 2, 3), # 每个状态产生的特征图输出的索引。 + dilations=(1, 1, 2, 4), # 每一层(layer)的空心率(dilation rate)。 + strides=(1, 2, 1, 1), # 每一层(layer)的步长(stride)。 + norm_cfg=dict( # 归一化层(norm layer)的配置项。 + type='SyncBN', # 归一化层的类别。通常是 SyncBN。 + requires_grad=True), # 是否训练归一化里的 gamma 和 beta。 + norm_eval=False, # 是否冻结 BN 里的统计项。 + style='pytorch', # 主干网络的风格,'pytorch' 意思是步长为2的层为 3x3 卷积, 'caffe' 意思是步长为2的层为 1x1 卷积。 + contract_dilation=True), # 当空洞 > 1, 是否压缩第一个空洞层。 + decode_head=dict( + type='PSPHead', # 解码头(decode head)的类别。 可用选项请参考 mmseg/models/decode_heads。 + in_channels=2048, # 解码头的输入通道数。 + in_index=3, # 被选择的特征图(feature map)的索引。 + channels=512, # 解码头中间态(intermediate)的通道数。 + pool_scales=(1, 2, 3, 6), # PSPHead 平均池化(avg pooling)的规模(scales)。 细节请参考文章内容。 + dropout_ratio=0.1, # 进入最后分类层(classification layer)之前的 dropout 比例。 + num_classes=19, # 分割前景的种类数目。 通常情况下,cityscapes 为19,VOC为21,ADE20k 为150。 + norm_cfg=dict(type='SyncBN', requires_grad=True), # 归一化层的配置项。 + align_corners=False, # 解码里调整大小(resize)的 align_corners 参数。 + loss_decode=dict( # 解码头(decode_head)里的损失函数的配置项。 + type='CrossEntropyLoss', # 在分割里使用的损失函数的类别。 + use_sigmoid=False, # 在分割里是否使用 sigmoid 激活。 + loss_weight=1.0)), # 解码头里损失的权重。 + auxiliary_head=dict( + type='FCNHead', # 辅助头(auxiliary head)的种类。可用选项请参考 mmseg/models/decode_heads。 + in_channels=1024, # 辅助头的输入通道数。 + in_index=2, # 被选择的特征图(feature map)的索引。 + channels=256, # 辅助头中间态(intermediate)的通道数。 + num_convs=1, # FCNHead 里卷积(convs)的数目. 辅助头里通常为1。 + concat_input=False, # 在分类层(classification layer)之前是否连接(concat)输入和卷积的输出。 + dropout_ratio=0.1, # 进入最后分类层(classification layer)之前的 dropout 比例。 + num_classes=19, # 分割前景的种类数目。 通常情况下,cityscapes 为19,VOC为21,ADE20k 为150。 + norm_cfg=dict(type='SyncBN', requires_grad=True), # 归一化层的配置项。 + align_corners=False, # 解码里调整大小(resize)的 align_corners 参数。 + loss_decode=dict( # 辅助头(auxiliary head)里的损失函数的配置项。 + type='CrossEntropyLoss', # 在分割里使用的损失函数的类别。 + use_sigmoid=False, # 在分割里是否使用 sigmoid 激活。 + loss_weight=0.4))) # 辅助头里损失的权重。默认设置为0.4。 +train_cfg = dict() # train_cfg 当前仅是一个占位符。 +test_cfg = dict(mode='whole') # 测试模式, 选项是 'whole' 和 'sliding'. 'whole': 整张图像全卷积(fully-convolutional)测试。 'sliding': 图像上做滑动裁剪窗口(sliding crop window)。 +dataset_type = 'CityscapesDataset' # 数据集类型,这将被用来定义数据集。 +data_root = 'data/cityscapes/' # 数据的根路径。 +img_norm_cfg = dict( # 图像归一化配置,用来归一化输入的图像。 + mean=[123.675, 116.28, 103.53], # 预训练里用于预训练主干网络模型的平均值。 + std=[58.395, 57.12, 57.375], # 预训练里用于预训练主干网络模型的标准差。 + to_rgb=True) # 预训练里用于预训练主干网络的图像的通道顺序。 +crop_size = (512, 1024) # 训练时的裁剪大小 +train_pipeline = [ #训练流程 + dict(type='LoadImageFromFile'), # 第1个流程,从文件路径里加载图像。 + dict(type='LoadAnnotations'), # 第2个流程,对于当前图像,加载它的注释信息。 + dict(type='Resize', # 变化图像和其注释大小的数据增广的流程。 + img_scale=(2048, 1024), # 图像的最大规模。 + ratio_range=(0.5, 2.0)), # 数据增广的比例范围。 + dict(type='RandomCrop', # 随机裁剪当前图像和其注释大小的数据增广的流程。 + crop_size=(512, 1024), # 随机裁剪图像生成 patch 的大小。 + cat_max_ratio=0.75), # 单个类别可以填充的最大区域的比例。 + dict( + type='RandomFlip', # 翻转图像和其注释大小的数据增广的流程。 + flip_ratio=0.5), # 翻转图像的概率 + dict(type='PhotoMetricDistortion'), # 光学上使用一些方法扭曲当前图像和其注释的数据增广的流程。 + dict( + type='Normalize', # 归一化当前图像的数据增广的流程。 + mean=[123.675, 116.28, 103.53], # 这些键与 img_norm_cfg 一致,因为 img_norm_cfg 被 + std=[58.395, 57.12, 57.375], # 用作参数。 + to_rgb=True), + dict(type='Pad', # 填充当前图像到指定大小的数据增广的流程。 + size=(512, 1024), # 填充的图像大小。 + pad_val=0, # 图像的填充值。 + seg_pad_val=255), # 'gt_semantic_seg'的填充值。 + dict(type='DefaultFormatBundle'), # 流程里收集数据的默认格式捆。 + dict(type='Collect', # 决定数据里哪些键被传递到分割器里的流程。 + keys=['img', 'gt_semantic_seg']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), # 第1个流程,从文件路径里加载图像。 + dict( + type='MultiScaleFlipAug', # 封装测试时数据增广(test time augmentations)。 + img_scale=(2048, 1024), # 决定测试时可改变图像的最大规模。用于改变图像大小的流程。 + flip=False, # 测试时是否翻转图像。 + transforms=[ + dict(type='Resize', # 使用改变图像大小的数据增广。 + keep_ratio=True), # 是否保持宽和高的比例,这里的图像比例设置将覆盖上面的图像规模大小的设置。 + dict(type='RandomFlip'), # 考虑到 RandomFlip 已经被添加到流程里,当 flip=False 时它将不被使用。 + dict( + type='Normalize', # 归一化配置项,值来自 img_norm_cfg。 + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='ImageToTensor', # 将图像转为张量 + keys=['img']), + dict(type='Collect', # 收集测试时必须的键的收集流程。 + keys=['img']) + ]) +] +data = dict( + samples_per_gpu=2, # 单个 GPU 的 Batch size + workers_per_gpu=2, # 单个 GPU 分配的数据加载线程数 + train=dict( # 训练数据集配置 + type='CityscapesDataset', # 数据集的类别, 细节参考自 mmseg/datasets/。 + data_root='data/cityscapes/', # 数据集的根目录。 + img_dir='leftImg8bit/train', # 数据集图像的文件夹。 + ann_dir='gtFine/train', # 数据集注释的文件夹。 + pipeline=[ # 流程, 由之前创建的 train_pipeline 传递进来。 + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict( + type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=(512, 1024), cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict( + type='Normalize', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='Pad', size=(512, 1024), pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']) + ]), + val=dict( # 验证数据集的配置 + type='CityscapesDataset', + data_root='data/cityscapes/', + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=[ # 由之前创建的 test_pipeline 传递的流程。 + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ]), + test=dict( + type='CityscapesDataset', + data_root='data/cityscapes/', + img_dir='leftImg8bit/val', + ann_dir='gtFine/val', + pipeline=[ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict( + type='Normalize', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']) + ]) + ])) +log_config = dict( # 注册日志钩 (register logger hook) 的配置文件。 + interval=50, # 打印日志的间隔 + hooks=[ + # dict(type='TensorboardLoggerHook') # 同样支持 Tensorboard 日志 + dict(type='TextLoggerHook', by_epoch=False) + ]) +dist_params = dict(backend='nccl') # 用于设置分布式训练的参数,端口也同样可被设置。 +log_level = 'INFO' # 日志的级别。 +load_from = None # 从一个给定路径里加载模型作为预训练模型,它并不会消耗训练时间。 +resume_from = None # 从给定路径里恢复检查点(checkpoints),训练模式将从检查点保存的轮次开始恢复训练。 +workflow = [('train', 1)] # runner 的工作流程。 [('train', 1)] 意思是只有一个工作流程而且工作流程 'train' 仅执行一次。根据 `runner.max_iters` 工作流程训练模型的迭代轮数为40000次。 +cudnn_benchmark = True # 是否是使用 cudnn_benchmark 去加速,它对于固定输入大小的可以提高训练速度。 +optimizer = dict( # 用于构建优化器的配置文件。支持 PyTorch 中的所有优化器,同时它们的参数与PyTorch里的优化器参数一致。 + type='SGD', # 优化器种类,更多细节可参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py#L13。 + lr=0.01, # 优化器的学习率,参数的使用细节请参照对应的 PyTorch 文档。 + momentum=0.9, # 动量 (Momentum) + weight_decay=0.0005) # SGD 的衰减权重 (weight decay)。 +optimizer_config = dict() # 用于构建优化器钩 (optimizer hook) 的配置文件,执行细节请参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/optimizer.py#L8。 +lr_config = dict( + policy='poly', # 调度流程的策略,同样支持 Step, CosineAnnealing, Cyclic 等. 请从 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py#L9 参考 LrUpdater 的细节。 + power=0.9, # 多项式衰减 (polynomial decay) 的幂。 + min_lr=0.0001, # 用来稳定训练的最小学习率。 + by_epoch=False) # 是否按照每个 epoch 去算学习率。 +runner = dict( + type='IterBasedRunner', # 将使用的 runner 的类别 (例如 IterBasedRunner 或 EpochBasedRunner)。 + max_iters=40000) # 全部迭代轮数大小,对于 EpochBasedRunner 使用 `max_epochs` 。 +checkpoint_config = dict( # 设置检查点钩子 (checkpoint hook) 的配置文件。执行时请参考 https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/checkpoint.py。 + by_epoch=False, # 是否按照每个 epoch 去算 runner。 + interval=4000) # 保存的间隔 +evaluation = dict( # 构建评估钩 (evaluation hook) 的配置文件。细节请参考 mmseg/core/evaluation/eval_hook.py。 + interval=4000, # 评估的间歇点 + metric='mIoU') # 评估的指标 + + +``` + +## FAQ + +### 忽略基础配置文件里的一些域内容。 + +有时,您也许会设置 `_delete_=True` 去忽略基础配置文件里的一些域内容。 +您也许可以参照 [mmcv](https://mmcv.readthedocs.io/en/latest/understand_mmcv/config.html#inherit-from-base-config-with-ignored-fields) 来获得一些简单的指导。 + +在 MMSegmentation 里,例如为了改变 PSPNet 的主干网络的某些内容: + +```python +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='MaskRCNN', + pretrained='torchvision://resnet50', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict(...), + auxiliary_head=dict(...)) +``` + +`ResNet` 和 `HRNet` 使用不同的关键词去构建。 + +```python +_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscpaes.py' +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + pretrained='open-mmlab://msra/hrnetv2_w32', + backbone=dict( + _delete_=True, + type='HRNet', + norm_cfg=norm_cfg, + extra=dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128)), + stage4=dict( + num_modules=3, + num_branches=4, + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)))), + decode_head=dict(...), + auxiliary_head=dict(...)) +``` + +`_delete_=True` 将用新的键去替换 `backbone` 域内所有老的键。 + +### 使用配置文件里的中间变量 + +配置文件里会使用一些中间变量,例如数据集里的 `train_pipeline`/`test_pipeline`。 +需要注意的是,在子配置文件里修改中间变量时,使用者需要再次传递这些变量给对应的域。 +例如,我们想改变在训练或测试时,PSPNet 的多尺度策略 (multi scale strategy),`train_pipeline`/`test_pipeline` 是我们想要修改的中间变量。 + +```python +_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscapes.py' +crop_size = (512, 1024) +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(1.0, 2.0)), # 改成 [1., 2.] + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], # 改成多尺度测试 (multi scale testing)。 + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +data = dict( + train=dict(pipeline=train_pipeline), + val=dict(pipeline=test_pipeline), + test=dict(pipeline=test_pipeline)) +``` + +我们首先定义新的 `train_pipeline`/`test_pipeline` 然后传递到 `data` 里。 + +同样的,如果我们想从 `SyncBN` 切换到 `BN` 或者 `MMSyncBN`,我们需要配置文件里的每一个 `norm_cfg`。 + +```python +_base_ = '../pspnet/psp_r50_512x1024_40ki_cityscpaes.py' +norm_cfg = dict(type='BN', requires_grad=True) +model = dict( + backbone=dict(norm_cfg=norm_cfg), + decode_head=dict(norm_cfg=norm_cfg), + auxiliary_head=dict(norm_cfg=norm_cfg)) +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/customize_datasets.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/customize_datasets.md new file mode 100644 index 0000000..2de1398 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/customize_datasets.md @@ -0,0 +1,209 @@ +# 教程 2: 自定义数据集 + +## 通过重新组织数据来定制数据集 + +最简单的方法是将您的数据集进行转化,并组织成文件夹的形式。 + +如下的文件结构就是一个例子。 + +```none +├── data +│ ├── my_dataset +│ │ ├── img_dir +│ │ │ ├── train +│ │ │ │ ├── xxx{img_suffix} +│ │ │ │ ├── yyy{img_suffix} +│ │ │ │ ├── zzz{img_suffix} +│ │ │ ├── val +│ │ ├── ann_dir +│ │ │ ├── train +│ │ │ │ ├── xxx{seg_map_suffix} +│ │ │ │ ├── yyy{seg_map_suffix} +│ │ │ │ ├── zzz{seg_map_suffix} +│ │ │ ├── val + +``` + +一个训练对将由 img_dir/ann_dir 里同样首缀的文件组成。 + +如果给定 `split` 参数,只有部分在 img_dir/ann_dir 里的文件会被加载。 +我们可以对被包括在 split 文本里的文件指定前缀。 + +除此以外,一个 split 文本如下所示: + +```none +xxx +zzz +``` + +只有 + +`data/my_dataset/img_dir/train/xxx{img_suffix}`, +`data/my_dataset/img_dir/train/zzz{img_suffix}`, +`data/my_dataset/ann_dir/train/xxx{seg_map_suffix}`, +`data/my_dataset/ann_dir/train/zzz{seg_map_suffix}` 将被加载。 + +注意:标注是跟图像同样的形状 (H, W),其中的像素值的范围是 `[0, num_classes - 1]`。 +您也可以使用 [pillow](https://pillow.readthedocs.io/en/stable/handbook/concepts.html#palette) 的 `'P'` 模式去创建包含颜色的标注。 + +## 通过混合数据去定制数据集 + +MMSegmentation 同样支持混合数据集去训练。 +当前它支持拼接 (concat), 重复 (repeat) 和多图混合 (multi-image mix)数据集。 + +### 重复数据集 + +我们使用 `RepeatDataset` 作为包装 (wrapper) 去重复数据集。 +例如,假设原始数据集是 `Dataset_A`,为了重复它,配置文件如下: + +```python +dataset_A_train = dict( + type='RepeatDataset', + times=N, + dataset=dict( # 这是 Dataset_A 数据集的原始配置 + type='Dataset_A', + ... + pipeline=train_pipeline + ) + ) +``` + +### 拼接数据集 + +有2种方式去拼接数据集。 + +1. 如果您想拼接的数据集是同样的类型,但有不同的标注文件, + 您可以按如下操作去拼接数据集的配置文件: + + 1. 您也许可以拼接两个标注文件夹 `ann_dir` + + ```python + dataset_A_train = dict( + type='Dataset_A', + img_dir = 'img_dir', + ann_dir = ['anno_dir_1', 'anno_dir_2'], + pipeline=train_pipeline + ) + ``` + + 2. 您也可以去拼接两个 `split` 文件列表 + + ```python + dataset_A_train = dict( + type='Dataset_A', + img_dir = 'img_dir', + ann_dir = 'anno_dir', + split = ['split_1.txt', 'split_2.txt'], + pipeline=train_pipeline + ) + ``` + + 3. 您也可以同时拼接 `ann_dir` 文件夹和 `split` 文件列表 + + ```python + dataset_A_train = dict( + type='Dataset_A', + img_dir = 'img_dir', + ann_dir = ['anno_dir_1', 'anno_dir_2'], + split = ['split_1.txt', 'split_2.txt'], + pipeline=train_pipeline + ) + ``` + + 在这样的情况下, `ann_dir_1` 和 `ann_dir_2` 分别对应于 `split_1.txt` 和 `split_2.txt` + +2. 如果您想拼接不同的数据集,您可以如下去拼接数据集的配置文件: + + ```python + dataset_A_train = dict() + dataset_B_train = dict() + + data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train = [ + dataset_A_train, + dataset_B_train + ], + val = dataset_A_val, + test = dataset_A_test + ) + ``` + +一个更复杂的例子如下:分别重复 `Dataset_A` 和 `Dataset_B` N 次和 M 次,然后再去拼接重复后的数据集 + +```python +dataset_A_train = dict( + type='RepeatDataset', + times=N, + dataset=dict( + type='Dataset_A', + ... + pipeline=train_pipeline + ) +) +dataset_A_val = dict( + ... + pipeline=test_pipeline +) +dataset_A_test = dict( + ... + pipeline=test_pipeline +) +dataset_B_train = dict( + type='RepeatDataset', + times=M, + dataset=dict( + type='Dataset_B', + ... + pipeline=train_pipeline + ) +) +data = dict( + imgs_per_gpu=2, + workers_per_gpu=2, + train = [ + dataset_A_train, + dataset_B_train + ], + val = dataset_A_val, + test = dataset_A_test +) + +``` + +### 多图混合集 + +我们使用 `MultiImageMixDataset` 作为包装(wrapper)去混合多个数据集的图片。 +`MultiImageMixDataset`可以被类似mosaic和mixup的多图混合数据増广使用。 + +`MultiImageMixDataset`与`Mosaic`数据増广一起使用的例子: + +```python +train_pipeline = [ + dict(type='RandomMosaic', prob=1), + dict(type='Resize', img_scale=(1024, 512), keep_ratio=True), + dict(type='RandomFlip', prob=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] + +train_dataset = dict( + type='MultiImageMixDataset', + dataset=dict( + classes=classes, + palette=palette, + type=dataset_type, + reduce_zero_label=False, + img_dir=data_root + "images/train", + ann_dir=data_root + "annotations/train", + pipeline=[ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + ] + ), + pipeline=train_pipeline +) + +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/customize_models.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/customize_models.md new file mode 100644 index 0000000..c92d7db --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/customize_models.md @@ -0,0 +1,230 @@ +# 教程 4: 自定义模型 + +## 自定义优化器 (optimizer) + +假设您想增加一个新的叫 `MyOptimizer` 的优化器,它的参数分别为 `a`, `b`, 和 `c`。 +您首先需要在一个文件里实现这个新的优化器,例如在 `mmseg/core/optimizer/my_optimizer.py` 里面: + +```python +from mmcv.runner import OPTIMIZERS +from torch.optim import Optimizer + + +@OPTIMIZERS.register_module +class MyOptimizer(Optimizer): + + def __init__(self, a, b, c) + +``` + +然后增加这个模块到 `mmseg/core/optimizer/__init__.py` 里面,这样注册器 (registry) 将会发现这个新的模块并添加它: + +```python +from .my_optimizer import MyOptimizer +``` + +之后您可以在配置文件的 `optimizer` 域里使用 `MyOptimizer`, +如下所示,在配置文件里,优化器被 `optimizer` 域所定义: + +```python +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +``` + +为了使用您自己的优化器,域可以被修改为: + +```python +optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value) +``` + +我们已经支持了 PyTorch 自带的全部优化器,唯一修改的地方是在配置文件里的 `optimizer` 域。例如,如果您想使用 `ADAM`,尽管数值表现会掉点,还是可以如下修改: + +```python +optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001) +``` + +使用者可以直接按照 PyTorch [文档教程](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) 去设置参数。 + +## 定制优化器的构造器 (optimizer constructor) + +对于优化,一些模型可能会有一些特别定义的参数,例如批归一化 (BatchNorm) 层里面的权重衰减 (weight decay)。 +使用者可以通过定制优化器的构造器来微调这些细粒度的优化器参数。 + +```python +from mmcv.utils import build_from_cfg + +from mmcv.runner import OPTIMIZER_BUILDERS +from .cocktail_optimizer import CocktailOptimizer + + +@OPTIMIZER_BUILDERS.register_module +class CocktailOptimizerConstructor(object): + + def __init__(self, optimizer_cfg, paramwise_cfg=None): + + def __call__(self, model): + + return my_optimizer + +``` + +## 开发和增加新的组件(Module) + +MMSegmentation 里主要有2种组件: + +- 主干网络 (backbone): 通常是卷积网络的堆叠,来做特征提取,例如 ResNet, HRNet +- 解码头 (decoder head): 用于语义分割图的解码的组件(得到分割结果) + +### 添加新的主干网络 + +这里我们以 MobileNet 为例,展示如何增加新的主干组件: + +1. 创建一个新的文件 `mmseg/models/backbones/mobilenet.py` + +```python +import torch.nn as nn + +from ..registry import BACKBONES + + +@BACKBONES.register_module +class MobileNet(nn.Module): + + def __init__(self, arg1, arg2): + pass + + def forward(self, x): # should return a tuple + pass + + def init_weights(self, pretrained=None): + pass +``` + +2. 在 `mmseg/models/backbones/__init__.py` 里面导入模块 + +```python +from .mobilenet import MobileNet +``` + +3. 在您的配置文件里使用它 + +```python +model = dict( + ... + backbone=dict( + type='MobileNet', + arg1=xxx, + arg2=xxx), + ... +``` + +### 增加新的解码头 (decoder head)组件 + +在 MMSegmentation 里面,对于所有的分割头,我们提供一个基类解码头 [BaseDecodeHead](https://github.com/open-mmlab/mmsegmentation/blob/master/mmseg/models/decode_heads/decode_head.py) 。 +所有新建的解码头都应该继承它。这里我们以 [PSPNet](https://arxiv.org/abs/1612.01105) 为例, +展示如何开发和增加一个新的解码头组件: + +首先,在 `mmseg/models/decode_heads/psp_head.py` 里添加一个新的解码头。 +PSPNet 中实现了一个语义分割的解码头。为了实现一个解码头,我们只需要在新构造的解码头中实现如下的3个函数: + +```python +@HEADS.register_module() +class PSPHead(BaseDecodeHead): + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(PSPHead, self).__init__(**kwargs) + + def init_weights(self): + + def forward(self, inputs): + +``` + +接着,使用者需要在 `mmseg/models/decode_heads/__init__.py` 里面添加这个模块,这样对应的注册器 (registry) 可以查找并加载它们。 + +PSPNet的配置文件如下所示: + +```python +norm_cfg = dict(type='SyncBN', requires_grad=True) +model = dict( + type='EncoderDecoder', + pretrained='pretrain_model/resnet50_v1c_trick-2cccc1ad.pth', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0))) + +``` + +### 增加新的损失函数 + +假设您想添加一个新的损失函数 `MyLoss` 到语义分割解码器里。 +为了添加一个新的损失函数,使用者需要在 `mmseg/models/losses/my_loss.py` 里面去实现它。 +`weighted_loss` 可以对计算损失时的每个样本做加权。 + +```python +import torch +import torch.nn as nn + +from ..builder import LOSSES +from .utils import weighted_loss + +@weighted_loss +def my_loss(pred, target): + assert pred.size() == target.size() and target.numel() > 0 + loss = torch.abs(pred - target) + return loss + +@LOSSES.register_module +class MyLoss(nn.Module): + + def __init__(self, reduction='mean', loss_weight=1.0): + super(MyLoss, self).__init__() + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None): + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + loss = self.loss_weight * my_loss( + pred, target, weight, reduction=reduction, avg_factor=avg_factor) + return loss +``` + +然后使用者需要在 `mmseg/models/losses/__init__.py` 里面添加它: + +```python +from .my_loss import MyLoss, my_loss + +``` + +为了使用它,修改 `loss_xxx` 域。之后您需要在解码头组件里修改 `loss_decode` 域。 +`loss_weight` 可以被用来对不同的损失函数做加权。 + +```python +loss_decode=dict(type='MyLoss', loss_weight=1.0)) +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/customize_runtime.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/customize_runtime.md new file mode 100644 index 0000000..9f791e1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/customize_runtime.md @@ -0,0 +1,248 @@ +# 教程 6: 自定义运行设定 + +## 自定义优化设定 + +### 自定义 PyTorch 支持的优化器 + +我们已经支持 PyTorch 自带的所有优化器,唯一需要修改的地方是在配置文件里的 `optimizer` 域里面。 +例如,如果您想使用 `ADAM` (注意如下操作可能会让模型表现下降),可以使用如下修改: + +```python +optimizer = dict(type='Adam', lr=0.0003, weight_decay=0.0001) +``` + +为了修改模型的学习率,使用者仅需要修改配置文件里 optimizer 的 `lr` 即可。 +使用者可以参照 PyTorch 的 [API 文档](https://pytorch.org/docs/stable/optim.html?highlight=optim#module-torch.optim) +直接设置参数。 + +### 自定义自己实现的优化器 + +#### 1. 定义一个新的优化器 + +一个自定义的优化器可以按照如下去定义: + +假如您想增加一个叫做 `MyOptimizer` 的优化器,它的参数分别有 `a`, `b`, 和 `c`。 +您需要创建一个叫 `mmseg/core/optimizer` 的新文件夹。 +然后再在文件,即 `mmseg/core/optimizer/my_optimizer.py` 里面去实现这个新优化器: + +```python +from .registry import OPTIMIZERS +from torch.optim import Optimizer + + +@OPTIMIZERS.register_module() +class MyOptimizer(Optimizer): + + def __init__(self, a, b, c) + +``` + +#### 2. 增加优化器到注册表 (registry) + +为了让上述定义的模块被框架发现,首先这个模块应该被导入到主命名空间 (main namespace) 里。 +有两种方式可以实现它。 + +- 修改 `mmseg/core/optimizer/__init__.py` 来导入它 + + 新的被定义的模块应该被导入到 `mmseg/core/optimizer/__init__.py` 这样注册表将会发现新的模块并添加它 + +```python +from .my_optimizer import MyOptimizer +``` + +- 在配置文件里使用 `custom_imports` 去手动导入它 + +```python +custom_imports = dict(imports=['mmseg.core.optimizer.my_optimizer'], allow_failed_imports=False) +``` + +`mmseg.core.optimizer.my_optimizer` 模块将会在程序运行的开始被导入,并且 `MyOptimizer` 类将会自动注册。 +需要注意只有包含 `MyOptimizer` 类的包 (package) 应当被导入。 +而 `mmseg.core.optimizer.my_optimizer.MyOptimizer` **不能** 被直接导入。 + +事实上,使用者完全可以用另一个按这样导入方法的文件夹结构,只要模块的根路径已经被添加到 `PYTHONPATH` 里面。 + +#### 3. 在配置文件里定义优化器 + +之后您可以在配置文件的 `optimizer` 域里面使用 `MyOptimizer` +在配置文件里,优化器被定义在 `optimizer` 域里,如下所示: + +```python +optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) +``` + +为了使用您自己的优化器,这个域可以被改成: + +```python +optimizer = dict(type='MyOptimizer', a=a_value, b=b_value, c=c_value) +``` + +### 自定义优化器的构造器 (constructor) + +有些模型可能需要在优化器里有一些特别参数的设置,例如 批归一化层 (BatchNorm layers) 的 权重衰减 (weight decay)。 +使用者可以通过自定义优化器的构造器去微调这些细粒度参数。 + +```python +from mmcv.utils import build_from_cfg + +from mmcv.runner.optimizer import OPTIMIZER_BUILDERS, OPTIMIZERS +from mmseg.utils import get_root_logger +from .my_optimizer import MyOptimizer + + +@OPTIMIZER_BUILDERS.register_module() +class MyOptimizerConstructor(object): + + def __init__(self, optimizer_cfg, paramwise_cfg=None): + + def __call__(self, model): + + return my_optimizer + +``` + +默认的优化器构造器的实现可以参照 [这里](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/optimizer/default_constructor.py#L11) ,它也可以被用作新的优化器构造器的模板。 + +### 额外的设置 + +优化器没有实现的一些技巧应该通过优化器构造器 (optimizer constructor) 或者钩子 (hook) 去实现,如设置基于参数的学习率 (parameter-wise learning rates)。我们列出一些常见的设置,它们可以稳定或加速模型的训练。 +如果您有更多的设置,欢迎在 PR 和 issue 里面提交。 + +- __使用梯度截断 (gradient clip) 去稳定训练__: + + 一些模型需要梯度截断去稳定训练过程,如下所示 + + ```python + optimizer_config = dict( + _delete_=True, grad_clip=dict(max_norm=35, norm_type=2)) + ``` + + 如果您的配置继承自已经设置了 `optimizer_config` 的基础配置 (base config),您可能需要 `_delete_=True` 来重写那些不需要的设置。更多细节请参照 [配置文件文档](https://mmsegmentation.readthedocs.io/en/latest/config.html) 。 + +- __使用动量计划表 (momentum schedule) 去加速模型收敛__: + + 我们支持动量计划表去让模型基于学习率修改动量,这样可能让模型收敛地更快。 + 动量计划表经常和学习率计划表 (LR scheduler) 一起使用,例如如下配置文件就在 3D 检测里经常使用以加速收敛。 + 更多细节请参考 [CyclicLrUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327) 和 [CyclicMomentumUpdater](https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130) 的实现。 + + ```python + lr_config = dict( + policy='cyclic', + target_ratio=(10, 1e-4), + cyclic_times=1, + step_ratio_up=0.4, + ) + momentum_config = dict( + policy='cyclic', + target_ratio=(0.85 / 0.95, 1), + cyclic_times=1, + step_ratio_up=0.4, + ) + ``` + +## 自定义训练计划表 + +我们根据默认的训练迭代步数 40k/80k 来设置学习率,这在 MMCV 里叫做 [`PolyLrUpdaterHook`](https://github.com/open-mmlab/mmcv/blob/826d3a7b68596c824fa1e2cb89b6ac274f52179c/mmcv/runner/hooks/lr_updater.py#L196) 。 +我们也支持许多其他的学习率计划表:[这里](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/hooks/lr_updater.py) ,例如 `CosineAnnealing` 和 `Poly` 计划表。下面是一些例子: + +- 步计划表 Step schedule: + + ```python + lr_config = dict(policy='step', step=[9, 10]) + ``` + +- 余弦退火计划表 ConsineAnnealing schedule: + + ```python + lr_config = dict( + policy='CosineAnnealing', + warmup='linear', + warmup_iters=1000, + warmup_ratio=1.0 / 10, + min_lr_ratio=1e-5) + ``` + +## 自定义工作流 (workflow) + +工作流是一个专门定义运行顺序和轮数 (running order and epochs) 的列表 (phase, epochs)。 +默认情况下它设置成: + +```python +workflow = [('train', 1)] +``` + +意思是训练是跑 1 个 epoch。有时候使用者可能想检查模型在验证集上的一些指标(如 损失 loss,精确性 accuracy),我们可以这样设置工作流: + +```python +[('train', 1), ('val', 1)] +``` + +于是 1 个 epoch 训练,1 个 epoch 验证将交替运行。 + +**注意**: + +1. 模型的参数在验证的阶段不会被自动更新 +2. 配置文件里的关键词 `total_epochs` 仅控制训练的 epochs 数目,而不会影响验证时的工作流 +3. 工作流 `[('train', 1), ('val', 1)]` 和 `[('train', 1)]` 将不会改变 `EvalHook` 的行为,因为 `EvalHook` 被 `after_train_epoch` + 调用而且验证的工作流仅仅影响通过调用 `after_val_epoch` 的钩子 (hooks)。因此, `[('train', 1), ('val', 1)]` 和 `[('train', 1)]` + 的区别仅在于 runner 将在每次训练 epoch 结束后计算在验证集上的损失 + +## 自定义钩 (hooks) + +### 使用 MMCV 实现的钩子 (hooks) + +如果钩子已经在 MMCV 里被实现,如下所示,您可以直接修改配置文件来使用钩子: + +```python +custom_hooks = [ + dict(type='MyHook', a=a_value, b=b_value, priority='NORMAL') +] +``` + +### 修改默认的运行时间钩子 (runtime hooks) + +以下的常用的钩子没有被 `custom_hooks` 注册: + +- log_config +- checkpoint_config +- evaluation +- lr_config +- optimizer_config +- momentum_config + +在这些钩子里,只有 logger hook 有 `VERY_LOW` 优先级,其他的优先级都是 `NORMAL`。 +上述提及的教程已经包括了如何修改 `optimizer_config`,`momentum_config` 和 `lr_config`。 +这里我们展示我们如何处理 `log_config`, `checkpoint_config` 和 `evaluation`。 + +#### 检查点配置文件 (Checkpoint config) + +MMCV runner 将使用 `checkpoint_config` 去初始化 [`CheckpointHook`](https://github.com/open-mmlab/mmcv/blob/9ecd6b0d5ff9d2172c49a182eaa669e9f27bb8e7/mmcv/runner/hooks/checkpoint.py#L9). + +```python +checkpoint_config = dict(interval=1) +``` + +使用者可以设置 `max_keep_ckpts` 来仅保存一小部分检查点或者通过 `save_optimizer` 来决定是否保存优化器的状态字典 (state dict of optimizer)。 更多使用参数的细节请参考 [这里](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.CheckpointHook) 。 + +#### 日志配置文件 (Log config) + +`log_config` 包裹了许多日志钩 (logger hooks) 而且能去设置间隔 (intervals)。现在 MMCV 支持 `WandbLoggerHook`, `MlflowLoggerHook` 和 `TensorboardLoggerHook`。 +详细的使用请参照 [文档](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook) 。 + +```python +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) +``` + +#### 评估配置文件 (Evaluation config) + +`evaluation` 的配置文件将被用来初始化 [`EvalHook`](https://github.com/open-mmlab/mmsegmentation/blob/e3f6f655d69b777341aec2fe8829871cc0beadcb/mmseg/core/evaluation/eval_hooks.py#L7) 。 +除了 `interval` 键,其他的像 `metric` 这样的参数将被传递给 `dataset.evaluate()` 。 + +```python +evaluation = dict(interval=1, metric='mIoU') +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/data_pipeline.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/data_pipeline.md new file mode 100644 index 0000000..119ae98 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/data_pipeline.md @@ -0,0 +1,166 @@ +# 教程 3: 自定义数据流程 + +## 数据流程的设计 + +按照通常的惯例,我们使用 `Dataset` 和 `DataLoader` 做多线程的数据加载。`Dataset` 返回一个数据内容的字典,里面对应于模型前传方法的各个参数。 +因为在语义分割中,输入的图像数据具有不同的大小,我们在 MMCV 里引入一个新的 `DataContainer` 类别去帮助收集和分发不同大小的输入数据。 + +更多细节,请查看[这里](https://github.com/open-mmlab/mmcv/blob/master/mmcv/parallel/data_container.py) 。 + +数据的准备流程和数据集是解耦的。通常一个数据集定义了如何处理标注数据(annotations)信息,而一个数据流程定义了准备一个数据字典的所有步骤。一个流程包括了一系列操作,每个操作里都把一个字典作为输入,然后再输出一个新的字典给下一个变换操作。 + +这些操作可分为数据加载 (data loading),预处理 (pre-processing),格式变化 (formatting) 和测试时数据增强 (test-time augmentation)。 + +下面的例子就是 PSPNet 的一个流程: + +```python +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +crop_size = (512, 1024) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(2048, 1024), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] +``` + +对于每个操作,我们列出它添加、更新、移除的相关字典域 (dict fields): + +### 数据加载 Data loading + +`LoadImageFromFile` + +- 增加: img, img_shape, ori_shape + +`LoadAnnotations` + +- 增加: gt_semantic_seg, seg_fields + +### 预处理 Pre-processing + +`Resize` + +- 增加: scale, scale_idx, pad_shape, scale_factor, keep_ratio +- 更新: img, img_shape, \*seg_fields + +`RandomFlip` + +- 增加: flip +- 更新: img, \*seg_fields + +`Pad` + +- 增加: pad_fixed_size, pad_size_divisor +- 更新: img, pad_shape, \*seg_fields + +`RandomCrop` + +- 更新: img, pad_shape, \*seg_fields + +`Normalize` + +- 增加: img_norm_cfg +- 更新: img + +`SegRescale` + +- 更新: gt_semantic_seg + +`PhotoMetricDistortion` + +- 更新: img + +### 格式 Formatting + +`ToTensor` + +- 更新: 由 `keys` 指定 + +`ImageToTensor` + +- 更新: 由 `keys` 指定 + +`Transpose` + +- 更新: 由 `keys` 指定 + +`ToDataContainer` + +- 更新: 由 `keys` 指定 + +`DefaultFormatBundle` + +- 更新: img, gt_semantic_seg + +`Collect` + +- 增加: img_meta (the keys of img_meta is specified by `meta_keys`) +- 移除: all other keys except for those specified by `keys` + +### 测试时数据增强 Test time augmentation + +`MultiScaleFlipAug` + +## 拓展和使用自定义的流程 + +1. 在任何一个文件里写一个新的流程,例如 `my_pipeline.py`,它以一个字典作为输入并且输出一个字典 + + ```python + from mmseg.datasets import PIPELINES + + @PIPELINES.register_module() + class MyTransform: + + def __call__(self, results): + results['dummy'] = True + return results + ``` + +2. 导入一个新类 + + ```python + from .my_pipeline import MyTransform + ``` + +3. 在配置文件里使用它 + + ```python + img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + crop_size = (512, 1024) + train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(2048, 1024), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='MyTransform'), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), + ] + ``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/index.rst b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/index.rst new file mode 100644 index 0000000..e1a67a8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/index.rst @@ -0,0 +1,9 @@ +.. toctree:: + :maxdepth: 2 + + config.md + customize_datasets.md + data_pipeline.md + customize_models.md + training_tricks.md + customize_runtime.md diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/training_tricks.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/training_tricks.md new file mode 100644 index 0000000..f67759a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/tutorials/training_tricks.md @@ -0,0 +1,95 @@ +# 教程 5: 训练技巧 + +MMSegmentation 支持如下训练技巧: + +## 主干网络和解码头组件使用不同的学习率 (Learning Rate, LR) + +在语义分割里,一些方法会让解码头组件的学习率大于主干网络的学习率,这样可以获得更好的表现或更快的收敛。 + +在 MMSegmentation 里面,您也可以在配置文件里添加如下行来让解码头组件的学习率是主干组件的10倍。 + +```python +optimizer=dict( + paramwise_cfg = dict( + custom_keys={ + 'head': dict(lr_mult=10.)})) +``` + +通过这种修改,任何被分组到 `'head'` 的参数的学习率都将乘以10。您也可以参照 [MMCV 文档](https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.DefaultOptimizerConstructor) 获取更详细的信息。 + +## 在线难样本挖掘 (Online Hard Example Mining, OHEM) + +对于训练时采样,我们在 [这里](https://github.com/open-mmlab/mmsegmentation/tree/master/mmseg/core/seg/sampler) 做了像素采样器。 +如下例子是使用 PSPNet 训练并采用 OHEM 策略的配置: + +```python +_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py' +model=dict( + decode_head=dict( + sampler=dict(type='OHEMPixelSampler', thresh=0.7, min_kept=100000)) ) +``` + +通过这种方式,只有置信分数在0.7以下的像素值点会被拿来训练。在训练时我们至少要保留100000个像素值点。如果 `thresh` 并未被指定,前 `min_kept` +个损失的像素值点才会被选择。 + +## 类别平衡损失 (Class Balanced Loss) + +对于不平衡类别分布的数据集,您也许可以改变每个类别的损失权重。这里以 cityscapes 数据集为例: + +```python +_base_ = './pspnet_r50-d8_512x1024_40k_cityscapes.py' +model=dict( + decode_head=dict( + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, + # DeepLab 对 cityscapes 使用这种权重 + class_weight=[0.8373, 0.9180, 0.8660, 1.0345, 1.0166, 0.9969, 0.9754, + 1.0489, 0.8786, 1.0023, 0.9539, 0.9843, 1.1116, 0.9037, + 1.0865, 1.0955, 1.0865, 1.1529, 1.0507]))) +``` + +`class_weight` 将被作为 `weight` 参数,传递给 `CrossEntropyLoss`。详细信息请参照 [PyTorch 文档](https://pytorch.org/docs/stable/nn.html?highlight=crossentropy#torch.nn.CrossEntropyLoss) 。 + +## 同时使用多种损失函数 (Multiple Losses) + +对于训练时损失函数的计算,我们目前支持多个损失函数同时使用。 以 `unet` 使用 `DRIVE` 数据集训练为例, +使用 `CrossEntropyLoss` 和 `DiceLoss` 的 `1:3` 的加权和作为损失函数。配置文件写为: + +```python +_base_ = './fcn_unet_s5-d16_64x64_40k_drive.py' +model = dict( + decode_head=dict(loss_decode=[dict(type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0)]), + auxiliary_head=dict(loss_decode=[dict(type='CrossEntropyLoss', loss_name='loss_ce',loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=3.0)]), + ) +``` + +通过这种方式,确定训练过程中损失函数的权重 `loss_weight` 和在训练日志里的名字 `loss_name`。 + +注意: `loss_name` 的名字必须带有 `loss_` 前缀,这样它才能被包括在反传的图里。 + +## 在损失函数中忽略特定的 label 类别 + +默认设置 `avg_non_ignore=False`, 即每个像素都用来计算损失函数。尽管其中的一些像素属于需要被忽略的类别。 + +对于训练时损失函数的计算,我们目前支持使用 `avg_non_ignore` 和 `ignore_index` 来忽略 label 特定的类别。 这样损失函数将只在非忽略类别像素中求平均值,会获得更好的表现。这里是[相关 PR](https://github.com/open-mmlab/mmsegmentation/pull/1409)。以 `unet` 使用 `Cityscapes` 数据集训练为例, +在计算损失函数时,忽略 label 为0的背景,并且仅在不被忽略的像素上计算均值。配置文件写为: + +```python +_base_ = './fcn_unet_s5-d16_4x4_512x1024_160k_cityscapes.py' +model = dict( + decode_head=dict( + ignore_index=0, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, avg_non_ignore=True), + auxiliary_head=dict( + ignore_index=0, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0, avg_non_ignore=True)), + )) +``` + +通过这种方式,确定训练过程中损失函数的权重 `loss_weight` 和在训练日志里的名字 `loss_name`。 + +注意: `loss_name` 的名字必须带有 `loss_` 前缀,这样它才能被包括在反传的图里。 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/useful_tools.md b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/useful_tools.md new file mode 100644 index 0000000..d6a4986 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/docs/zh_cn/useful_tools.md @@ -0,0 +1,368 @@ +## 常用工具 + +除了训练和测试的脚本,我们在 `tools/` 文件夹路径下还提供许多有用的工具。 + +### 计算参数量(params)和计算量( FLOPs) (试验性) + +我们基于 [flops-counter.pytorch](https://github.com/sovrasov/flops-counter.pytorch) +提供了一个用于计算给定模型参数量和计算量的脚本。 + +```shell +python tools/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}] +``` + +您将得到如下的结果: + +```none +============================== +Input shape: (3, 2048, 1024) +Flops: 1429.68 GMac +Params: 48.98 M +============================== +``` + +**注意**: 这个工具仍然是试验性的,我们无法保证数字是正确的。您可以拿这些结果做简单的实验的对照,在写技术文档报告或者论文前您需要再次确认一下。 + +(1) 计算量与输入的形状有关,而参数量与输入的形状无关,默认的输入形状是 (1, 3, 1280, 800); +(2) 一些运算操作,如 GN 和其他定制的运算操作没有加入到计算量的计算中。 + +### 发布模型 + +在您上传一个模型到云服务器之前,您需要做以下几步: +(1) 将模型权重转成 CPU 张量; +(2) 删除记录优化器状态 (optimizer states)的相关信息; +(3) 计算检查点文件 (checkpoint file) 的哈希编码(hash id)并且将哈希编码加到文件名中。 + +```shell +python tools/publish_model.py ${INPUT_FILENAME} ${OUTPUT_FILENAME} +``` + +例如, + +```shell +python tools/publish_model.py work_dirs/pspnet/latest.pth psp_r50_hszhao_200ep.pth +``` + +最终输出文件将是 `psp_r50_512x1024_40ki_cityscapes-{hash id}.pth`。 + +### 导出 ONNX (试验性) + +我们提供了一个脚本来导出模型到 [ONNX](https://github.com/onnx/onnx) 格式。被转换的模型可以通过工具 [Netron](https://github.com/lutzroeder/netron) +来可视化。除此以外,我们同样支持对 PyTorch 和 ONNX 模型的输出结果做对比。 + +```bash +python tools/pytorch2onnx.py \ + ${CONFIG_FILE} \ + --checkpoint ${CHECKPOINT_FILE} \ + --output-file ${ONNX_FILE} \ + --input-img ${INPUT_IMG} \ + --shape ${INPUT_SHAPE} \ + --rescale-shape ${RESCALE_SHAPE} \ + --show \ + --verify \ + --dynamic-export \ + --cfg-options \ + model.test_cfg.mode="whole" +``` + +各个参数的描述: + +- `config` : 模型配置文件的路径 +- `--checkpoint` : 模型检查点文件的路径 +- `--output-file`: 输出的 ONNX 模型的路径。如果没有专门指定,它默认是 `tmp.onnx` +- `--input-img` : 用来转换和可视化的一张输入图像的路径 +- `--shape`: 模型的输入张量的高和宽。如果没有专门指定,它将被设置成 `test_pipeline` 的 `img_scale` +- `--rescale-shape`: 改变输出的形状。设置这个值来避免 OOM,它仅在 `slide` 模式下可以用 +- `--show`: 是否打印输出模型的结构。如果没有被专门指定,它将被设置成 `False` +- `--verify`: 是否验证一个输出模型的正确性 (correctness)。如果没有被专门指定,它将被设置成 `False` +- `--dynamic-export`: 是否导出形状变化的输入与输出的 ONNX 模型。如果没有被专门指定,它将被设置成 `False` +- `--cfg-options`: 更新配置选项 + +**注意**: 这个工具仍然是试验性的,目前一些自定义操作还没有被支持 + +### 评估 ONNX 模型 + +我们提供 `tools/deploy_test.py` 去评估不同后端的 ONNX 模型。 + +#### 先决条件 + +- 安装 onnx 和 onnxruntime-gpu + + ```shell + pip install onnx onnxruntime-gpu + ``` + +- 参考 [如何在 MMCV 里构建 tensorrt 插件](https://mmcv.readthedocs.io/en/latest/tensorrt_plugin.html#how-to-build-tensorrt-plugins-in-mmcv) 安装TensorRT (可选) + +#### 使用方法 + +```bash +python tools/deploy_test.py \ + ${CONFIG_FILE} \ + ${MODEL_FILE} \ + ${BACKEND} \ + --out ${OUTPUT_FILE} \ + --eval ${EVALUATION_METRICS} \ + --show \ + --show-dir ${SHOW_DIRECTORY} \ + --cfg-options ${CFG_OPTIONS} \ + --eval-options ${EVALUATION_OPTIONS} \ + --opacity ${OPACITY} \ +``` + +各个参数的描述: + +- `config`: 模型配置文件的路径 +- `model`: 被转换的模型文件的路径 +- `backend`: 推理的后端,可选项:`onnxruntime`, `tensorrt` +- `--out`: 输出结果成 pickle 格式文件的路径 +- `--format-only` : 不评估直接给输出结果的格式。通常用在当您想把结果输出成一些测试服务器需要的特定格式时。如果没有被专门指定,它将被设置成 `False`。 注意这个参数是用 `--eval` 来 **手动添加** +- `--eval`: 评估指标,取决于每个数据集的要求,例如 "mIoU" 是大多数据集的指标而 "cityscapes" 仅针对 Cityscapes 数据集。注意这个参数是用 `--format-only` 来 **手动添加** +- `--show`: 是否展示结果 +- `--show-dir`: 涂上结果的图像被保存的文件夹的路径 +- `--cfg-options`: 重写配置文件里的一些设置,`xxx=yyy` 格式的键值对将被覆盖到配置文件里 +- `--eval-options`: 自定义的评估的选项, `xxx=yyy` 格式的键值对将成为 `dataset.evaluate()` 函数的参数变量 +- `--opacity`: 涂上结果的分割图的透明度,范围在 (0, 1\] 之间 + +#### 结果和模型 + +| 模型 | 配置文件 | 数据集 | 评价指标 | PyTorch | ONNXRuntime | TensorRT-fp32 | TensorRT-fp16 | +| :--------: | :---------------------------------------------: | :--------: | :------: | :-----: | :---------: | :-----------: | :-----------: | +| FCN | fcn_r50-d8_512x1024_40k_cityscapes.py | cityscapes | mIoU | 72.2 | 72.2 | 72.2 | 72.2 | +| PSPNet | pspnet_r50-d8_512x1024_40k_cityscapes.py | cityscapes | mIoU | 77.8 | 77.8 | 77.8 | 77.8 | +| deeplabv3 | deeplabv3_r50-d8_512x1024_40k_cityscapes.py | cityscapes | mIoU | 79.0 | 79.0 | 79.0 | 79.0 | +| deeplabv3+ | deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py | cityscapes | mIoU | 79.6 | 79.5 | 79.5 | 79.5 | +| PSPNet | pspnet_r50-d8_769x769_40k_cityscapes.py | cityscapes | mIoU | 78.2 | 78.1 | | | +| deeplabv3 | deeplabv3_r50-d8_769x769_40k_cityscapes.py | cityscapes | mIoU | 78.5 | 78.3 | | | +| deeplabv3+ | deeplabv3plus_r50-d8_769x769_40k_cityscapes.py | cityscapes | mIoU | 78.9 | 78.7 | | | + +**注意**: TensorRT 仅在使用 `whole mode` 测试模式时的配置文件里可用。 + +### 导出 TorchScript (试验性) + +我们同样提供一个脚本去把模型导出成 [TorchScript](https://pytorch.org/docs/stable/jit.html) 格式。您可以使用 pytorch C++ API [LibTorch](https://pytorch.org/docs/stable/cpp_index.html) 去推理训练好的模型。 +被转换的模型能被像 [Netron](https://github.com/lutzroeder/netron) 的工具来可视化。此外,我们还支持 PyTorch 和 TorchScript 模型的输出结果的比较。 + +```shell +python tools/pytorch2torchscript.py \ + ${CONFIG_FILE} \ + --checkpoint ${CHECKPOINT_FILE} \ + --output-file ${ONNX_FILE} + --shape ${INPUT_SHAPE} + --verify \ + --show +``` + +各个参数的描述: + +- `config` : pytorch 模型的配置文件的路径 +- `--checkpoint` : pytorch 模型的检查点文件的路径 +- `--output-file`: TorchScript 模型输出的路径,如果没有被专门指定,它将被设置成 `tmp.pt` +- `--input-img` : 用来转换和可视化的输入图像的路径 +- `--shape`: 模型的输入张量的宽和高。如果没有被专门指定,它将被设置成 `512 512` +- `--show`: 是否打印输出模型的追踪图 (traced graph),如果没有被专门指定,它将被设置成 `False` +- `--verify`: 是否验证一个输出模型的正确性 (correctness),如果没有被专门指定,它将被设置成 `False` + +**注意**: 目前仅支持 PyTorch>=1.8.0 版本 + +**注意**: 这个工具仍然是试验性的,一些自定义操作符目前还不被支持 + +例子: + +- 导出 PSPNet 在 cityscapes 数据集上的 pytorch 模型 + + ```shell + python tools/pytorch2torchscript.py configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py \ + --checkpoint checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth \ + --output-file checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pt \ + --shape 512 1024 + ``` + +### 导出 TensorRT (试验性) + +一个导出 [ONNX](https://github.com/onnx/onnx) 模型成 [TensorRT](https://developer.nvidia.com/tensorrt) 格式的脚本 + +先决条件 + +- 按照 [ONNXRuntime in mmcv](https://mmcv.readthedocs.io/en/latest/deployment/onnxruntime_op.html) 和 [TensorRT plugin in mmcv](https://github.com/open-mmlab/mmcv/blob/master/docs/en/deployment/tensorrt_plugin.md) ,用 ONNXRuntime 自定义运算 (custom ops) 和 TensorRT 插件安装 `mmcv-full` +- 使用 [pytorch2onnx](#convert-to-onnx-experimental) 将模型从 PyTorch 转成 ONNX + +使用方法 + +```bash +python ${MMSEG_PATH}/tools/onnx2tensorrt.py \ + ${CFG_PATH} \ + ${ONNX_PATH} \ + --trt-file ${OUTPUT_TRT_PATH} \ + --min-shape ${MIN_SHAPE} \ + --max-shape ${MAX_SHAPE} \ + --input-img ${INPUT_IMG} \ + --show \ + --verify +``` + +各个参数的描述: + +- `config` : 模型的配置文件 +- `model` : 输入的 ONNX 模型的路径 +- `--trt-file` : 输出的 TensorRT 引擎的路径 +- `--max-shape` : 模型的输入的最大形状 +- `--min-shape` : 模型的输入的最小形状 +- `--fp16` : 做 fp16 模型转换 +- `--workspace-size` : 在 GiB 里的最大工作空间大小 (Max workspace size) +- `--input-img` : 用来可视化的图像 +- `--show` : 做结果的可视化 +- `--dataset` : Palette provider, 默认为 `CityscapesDataset` +- `--verify` : 验证 ONNXRuntime 和 TensorRT 的输出 +- `--verbose` : 当创建 TensorRT 引擎时,是否详细做信息日志。默认为 False + +**注意**: 仅在全图测试模式 (whole mode) 下测试过 + +## 其他内容 + +### 打印完整的配置文件 + +`tools/print_config.py` 会逐字逐句的打印整个配置文件,展开所有的导入。 + +```shell +python tools/print_config.py \ + ${CONFIG} \ + --graph \ + --cfg-options ${OPTIONS [OPTIONS...]} \ +``` + +各个参数的描述: + +- `config` : pytorch 模型的配置文件的路径 +- `--graph` : 是否打印模型的图 (models graph) +- `--cfg-options`: 自定义替换配置文件的选项 + +### 对训练日志 (training logs) 画图 + +`tools/analyze_logs.py` 会画出给定的训练日志文件的 loss/mIoU 曲线,首先需要 `pip install seaborn` 安装依赖包。 + +```shell +python tools/analyze_logs.py xxx.log.json [--keys ${KEYS}] [--legend ${LEGEND}] [--backend ${BACKEND}] [--style ${STYLE}] [--out ${OUT_FILE}] +``` + +示例: + +- 对 mIoU, mAcc, aAcc 指标画图 + + ```shell + python tools/analyze_logs.py log.json --keys mIoU mAcc aAcc --legend mIoU mAcc aAcc + ``` + +- 对 loss 指标画图 + + ```shell + python tools/analyze_logs.py log.json --keys loss --legend loss + ``` + +### 转换其他仓库的权重 + +`tools/model_converters/` 提供了若干个预训练权重转换脚本,支持将其他仓库的预训练权重的 key 转换为与 MMSegmentation 相匹配的 key。 + +#### ViT Swin MiT Transformer 模型 + +- ViT + +`tools/model_converters/vit2mmseg.py` 将 timm 预训练模型转换到 MMSegmentation。 + +```shell +python tools/model_converters/vit2mmseg.py ${SRC} ${DST} +``` + +- Swin + + `tools/model_converters/swin2mmseg.py` 将官方预训练模型转换到 MMSegmentation。 + + ```shell + python tools/model_converters/swin2mmseg.py ${SRC} ${DST} + ``` + +- SegFormer + + `tools/model_converters/mit2mmseg.py` 将官方预训练模型转换到 MMSegmentation。 + + ```shell + python tools/model_converters/mit2mmseg.py ${SRC} ${DST} + ``` + +## 模型服务 + +为了用 [`TorchServe`](https://pytorch.org/serve/) 服务 `MMSegmentation` 的模型 , 您可以遵循如下流程: + +### 1. 将 model 从 MMSegmentation 转换到 TorchServe + +```shell +python tools/mmseg2torchserve.py ${CONFIG_FILE} ${CHECKPOINT_FILE} \ +--output-folder ${MODEL_STORE} \ +--model-name ${MODEL_NAME} +``` + +**注意**: ${MODEL_STORE} 需要设置为某个文件夹的绝对路径 + +### 2. 构建 `mmseg-serve` 容器镜像 (docker image) + +```shell +docker build -t mmseg-serve:latest docker/serve/ +``` + +### 3. 运行 `mmseg-serve` + +请查阅官方文档: [使用容器运行 TorchServe](https://github.com/pytorch/serve/blob/master/docker/README.md#running-torchserve-in-a-production-docker-environment) + +为了在 GPU 环境下使用, 您需要安装 [nvidia-docker](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). 若在 CPU 环境下使用,您可以忽略添加 `--gpus` 参数。 + +示例: + +```shell +docker run --rm \ +--cpus 8 \ +--gpus device=0 \ +-p8080:8080 -p8081:8081 -p8082:8082 \ +--mount type=bind,source=$MODEL_STORE,target=/home/model-server/model-store \ +mmseg-serve:latest +``` + +阅读关于推理 (8080), 管理 (8081) 和指标 (8082) APIs 的 [文档](https://github.com/pytorch/serve/blob/072f5d088cce9bb64b2a18af065886c9b01b317b/docs/rest_api.md) 。 + +### 4. 测试部署 + +```shell +curl -O https://raw.githubusercontent.com/open-mmlab/mmsegmentation/master/resources/3dogs.jpg +curl http://127.0.0.1:8080/predictions/${MODEL_NAME} -T 3dogs.jpg -o 3dogs_mask.png +``` + +得到的响应将是一个 ".png" 的分割掩码. + +您可以按照如下方法可视化输出: + +```python +import matplotlib.pyplot as plt +import mmcv +plt.imshow(mmcv.imread("3dogs_mask.png", "grayscale")) +plt.show() +``` + +看到的东西将会和下图类似: + +![3dogs_mask](../../resources/3dogs_mask.png) + +然后您可以使用 `test_torchserve.py` 比较 torchserve 和 pytorch 的结果,并将它们可视化。 + +```shell +python tools/torchserve/test_torchserve.py ${IMAGE_FILE} ${CONFIG_FILE} ${CHECKPOINT_FILE} ${MODEL_NAME} +[--inference-addr ${INFERENCE_ADDR}] [--result-image ${RESULT_IMAGE}] [--device ${DEVICE}] +``` + +示例: + +```shell +python tools/torchserve/test_torchserve.py \ +demo/demo.png \ +configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py \ +checkpoint/fcn_r50-d8_512x1024_40k_cityscapes_20200604_192608-efe53f0d.pth \ +fcn +``` diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/__init__.py new file mode 100644 index 0000000..360abfc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/__init__.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import mmcv +from packaging.version import parse + +from .version import __version__, version_info + +MMCV_MIN = '1.3.13' +MMCV_MAX = '1.6.0' + + +def digit_version(version_str: str, length: int = 4): + """Convert a version string into a tuple of integers. + + This method is usually used for comparing two versions. For pre-release + versions: alpha < beta < rc. + + Args: + version_str (str): The version string. + length (int): The maximum number of version levels. Default: 4. + + Returns: + tuple[int]: The version info in digits (integers). + """ + version = parse(version_str) + assert version.release, f'failed to parse version {version_str}' + release = list(version.release) + release = release[:length] + if len(release) < length: + release = release + [0] * (length - len(release)) + if version.is_prerelease: + mapping = {'a': -3, 'b': -2, 'rc': -1} + val = -4 + # version.pre can be None + if version.pre: + if version.pre[0] not in mapping: + warnings.warn(f'unknown prerelease version {version.pre[0]}, ' + 'version checking may go wrong') + else: + val = mapping[version.pre[0]] + release.extend([val, version.pre[-1]]) + else: + release.extend([val, 0]) + + elif version.is_postrelease: + release.extend([1, version.post]) + else: + release.extend([0, 0]) + return tuple(release) + + +mmcv_min_version = digit_version(MMCV_MIN) +mmcv_max_version = digit_version(MMCV_MAX) +mmcv_version = digit_version(mmcv.__version__) + + +assert (mmcv_min_version <= mmcv_version <= mmcv_max_version), \ + f'MMCV=={mmcv.__version__} is used but incompatible. ' \ + f'Please install mmcv>={mmcv_min_version}, <={mmcv_max_version}.' + +__all__ = ['__version__', 'version_info', 'digit_version'] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/__init__.py new file mode 100644 index 0000000..c688180 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .inference import inference_segmentor, init_segmentor, show_result_pyplot +from .test import multi_gpu_test, single_gpu_test +from .train import (get_root_logger, init_random_seed, set_random_seed, + train_segmentor) + +__all__ = [ + 'get_root_logger', 'set_random_seed', 'train_segmentor', 'init_segmentor', + 'inference_segmentor', 'multi_gpu_test', 'single_gpu_test', + 'show_result_pyplot', 'init_random_seed' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/inference.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/inference.py new file mode 100644 index 0000000..a2a8ab0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/inference.py @@ -0,0 +1,141 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import matplotlib.pyplot as plt +import mmcv +import torch +from mmcv.parallel import collate, scatter +from mmcv.runner import load_checkpoint + +from mmseg.datasets.pipelines import Compose +from mmseg.models import build_segmentor + + +def init_segmentor(config, checkpoint=None, device='cuda:0'): + """Initialize a segmentor from config file. + + Args: + config (str or :obj:`mmcv.Config`): Config file path or the config + object. + checkpoint (str, optional): Checkpoint path. If left as None, the model + will not load any weights. + device (str, optional) CPU/CUDA device option. Default 'cuda:0'. + Use 'cpu' for loading model on CPU. + Returns: + nn.Module: The constructed segmentor. + """ + if isinstance(config, str): + config = mmcv.Config.fromfile(config) + elif not isinstance(config, mmcv.Config): + raise TypeError('config must be a filename or Config object, ' + 'but got {}'.format(type(config))) + config.model.pretrained = None + config.model.train_cfg = None + model = build_segmentor(config.model, test_cfg=config.get('test_cfg')) + if checkpoint is not None: + checkpoint = load_checkpoint(model, checkpoint, map_location='cpu') + model.CLASSES = checkpoint['meta']['CLASSES'] + model.PALETTE = checkpoint['meta']['PALETTE'] + model.cfg = config # save the config in the model for convenience + model.to(device) + model.eval() + return model + + +class LoadImage: + """A simple pipeline to load image.""" + + def __call__(self, results): + """Call function to load images into results. + + Args: + results (dict): A result dict contains the file name + of the image to be read. + + Returns: + dict: ``results`` will be returned containing loaded image. + """ + + if isinstance(results['img'], str): + results['filename'] = results['img'] + results['ori_filename'] = results['img'] + else: + results['filename'] = None + results['ori_filename'] = None + img = mmcv.imread(results['img']) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + return results + + +def inference_segmentor(model, img): + """Inference image(s) with the segmentor. + + Args: + model (nn.Module): The loaded segmentor. + imgs (str/ndarray or list[str/ndarray]): Either image files or loaded + images. + + Returns: + (list[Tensor]): The segmentation result. + """ + cfg = model.cfg + device = next(model.parameters()).device # model device + # build the data pipeline + test_pipeline = [LoadImage()] + cfg.data.test.pipeline[1:] + test_pipeline = Compose(test_pipeline) + # prepare data + data = dict(img=img) + data = test_pipeline(data) + data = collate([data], samples_per_gpu=1) + if next(model.parameters()).is_cuda: + # scatter to specified GPU + data = scatter(data, [device])[0] + else: + data['img_metas'] = [i.data[0] for i in data['img_metas']] + + # forward the model + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + return result + + +def show_result_pyplot(model, + img, + result, + palette=None, + fig_size=(15, 10), + opacity=0.5, + title='', + block=True, + out_file=None): + """Visualize the segmentation results on the image. + + Args: + model (nn.Module): The loaded segmentor. + img (str or np.ndarray): Image filename or loaded image. + result (list): The segmentation result. + palette (list[list[int]]] | None): The palette of segmentation + map. If None is given, random palette will be generated. + Default: None + fig_size (tuple): Figure size of the pyplot figure. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + title (str): The title of pyplot figure. + Default is ''. + block (bool): Whether to block the pyplot figure. + Default is True. + out_file (str or None): The path to write the image. + Default: None. + """ + if hasattr(model, 'module'): + model = model.module + img = model.show_result( + img, result, palette=palette, show=False, opacity=opacity) + plt.figure(figsize=fig_size) + plt.imshow(mmcv.bgr2rgb(img)) + plt.title(title) + plt.tight_layout() + plt.show(block=block) + if out_file is not None: + mmcv.imwrite(img, out_file) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/test.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/test.py new file mode 100644 index 0000000..c263237 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/test.py @@ -0,0 +1,234 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import tempfile +import warnings + +import mmcv +import numpy as np +import torch +from mmcv.engine import collect_results_cpu, collect_results_gpu +from mmcv.image import tensor2imgs +from mmcv.runner import get_dist_info + + +def np2tmp(array, temp_file_name=None, tmpdir=None): + """Save ndarray to local numpy file. + + Args: + array (ndarray): Ndarray to save. + temp_file_name (str): Numpy file name. If 'temp_file_name=None', this + function will generate a file name with tempfile.NamedTemporaryFile + to save ndarray. Default: None. + tmpdir (str): Temporary directory to save Ndarray files. Default: None. + Returns: + str: The numpy file name. + """ + + if temp_file_name is None: + temp_file_name = tempfile.NamedTemporaryFile( + suffix='.npy', delete=False, dir=tmpdir).name + np.save(temp_file_name, array) + return temp_file_name + + +def single_gpu_test(model, + data_loader, + show=False, + out_dir=None, + efficient_test=False, + opacity=0.5, + pre_eval=False, + format_only=False, + format_args={}): + """Test with single GPU by progressive mode. + + Args: + model (nn.Module): Model to be tested. + data_loader (utils.data.Dataloader): Pytorch data loader. + show (bool): Whether show results during inference. Default: False. + out_dir (str, optional): If specified, the results will be dumped into + the directory to save output results. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Mutually exclusive with + pre_eval and format_results. Default: False. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + pre_eval (bool): Use dataset.pre_eval() function to generate + pre_results for metric evaluation. Mutually exclusive with + efficient_test and format_results. Default: False. + format_only (bool): Only format result for results commit. + Mutually exclusive with pre_eval and efficient_test. + Default: False. + format_args (dict): The args for format_results. Default: {}. + Returns: + list: list of evaluation pre-results or list of save file names. + """ + if efficient_test: + warnings.warn( + 'DeprecationWarning: ``efficient_test`` will be deprecated, the ' + 'evaluation is CPU memory friendly with pre_eval=True') + mmcv.mkdir_or_exist('.efficient_test') + # when none of them is set true, return segmentation results as + # a list of np.array. + assert [efficient_test, pre_eval, format_only].count(True) <= 1, \ + '``efficient_test``, ``pre_eval`` and ``format_only`` are mutually ' \ + 'exclusive, only one of them could be true .' + + model.eval() + results = [] + dataset = data_loader.dataset + prog_bar = mmcv.ProgressBar(len(dataset)) + # The pipeline about how the data_loader retrieval samples from dataset: + # sampler -> batch_sampler -> indices + # The indices are passed to dataset_fetcher to get data from dataset. + # data_fetcher -> collate_fn(dataset[index]) -> data_sample + # we use batch_sampler to get correct data idx + loader_indices = data_loader.batch_sampler + + for batch_indices, data in zip(loader_indices, data_loader): + with torch.no_grad(): + result = model(return_loss=False, **data) + + if show or out_dir: + img_tensor = data['img'][0] + img_metas = data['img_metas'][0].data[0] + #print(img_metas) + imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg']) + assert len(imgs) == len(img_metas) + + for img, img_meta in zip(imgs, img_metas): + h, w, _ = img_meta['img_shape'] + img_show = img[:h, :w, :] + + ori_h, ori_w = img_meta['ori_shape'][:-1] + img_show = mmcv.imresize(img_show, (ori_w, ori_h)) + + if out_dir: + out_file = osp.join(out_dir, img_meta['ori_filename']) + else: + out_file = None + + model.module.show_result( + img_show, + result, + palette=dataset.PALETTE, + show=show, + out_file=out_file, + opacity=opacity) + + if efficient_test: + result = [np2tmp(_, tmpdir='.efficient_test') for _ in result] + + if format_only: + result = dataset.format_results( + result, indices=batch_indices, **format_args) + if pre_eval: + # TODO: adapt samples_per_gpu > 1. + # only samples_per_gpu=1 valid now + result = dataset.pre_eval(result, indices=batch_indices) + results.extend(result) + else: + results.extend(result) + + batch_size = len(result) + for _ in range(batch_size): + prog_bar.update() + + return results + + +def multi_gpu_test(model, + data_loader, + tmpdir=None, + gpu_collect=False, + efficient_test=False, + pre_eval=False, + format_only=False, + format_args={}): + """Test model with multiple gpus by progressive mode. + + This method tests model with multiple gpus and collects the results + under two different modes: gpu and cpu modes. By setting 'gpu_collect=True' + it encodes results to gpu tensors and use gpu communication for results + collection. On cpu mode it saves the results on different gpus to 'tmpdir' + and collects them by the rank 0 worker. + + Args: + model (nn.Module): Model to be tested. + data_loader (utils.data.Dataloader): Pytorch data loader. + tmpdir (str): Path of directory to save the temporary results from + different gpus under cpu mode. The same path is used for efficient + test. Default: None. + gpu_collect (bool): Option to use either gpu or cpu to collect results. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Mutually exclusive with + pre_eval and format_results. Default: False. + pre_eval (bool): Use dataset.pre_eval() function to generate + pre_results for metric evaluation. Mutually exclusive with + efficient_test and format_results. Default: False. + format_only (bool): Only format result for results commit. + Mutually exclusive with pre_eval and efficient_test. + Default: False. + format_args (dict): The args for format_results. Default: {}. + + Returns: + list: list of evaluation pre-results or list of save file names. + """ + if efficient_test: + warnings.warn( + 'DeprecationWarning: ``efficient_test`` will be deprecated, the ' + 'evaluation is CPU memory friendly with pre_eval=True') + mmcv.mkdir_or_exist('.efficient_test') + # when none of them is set true, return segmentation results as + # a list of np.array. + assert [efficient_test, pre_eval, format_only].count(True) <= 1, \ + '``efficient_test``, ``pre_eval`` and ``format_only`` are mutually ' \ + 'exclusive, only one of them could be true .' + + model.eval() + results = [] + dataset = data_loader.dataset + # The pipeline about how the data_loader retrieval samples from dataset: + # sampler -> batch_sampler -> indices + # The indices are passed to dataset_fetcher to get data from dataset. + # data_fetcher -> collate_fn(dataset[index]) -> data_sample + # we use batch_sampler to get correct data idx + + # batch_sampler based on DistributedSampler, the indices only point to data + # samples of related machine. + loader_indices = data_loader.batch_sampler + + rank, world_size = get_dist_info() + if rank == 0: + prog_bar = mmcv.ProgressBar(len(dataset)) + + for batch_indices, data in zip(loader_indices, data_loader): + with torch.no_grad(): + result = model(return_loss=False, rescale=True, **data) + + if efficient_test: + result = [np2tmp(_, tmpdir='.efficient_test') for _ in result] + + if format_only: + result = dataset.format_results( + result, indices=batch_indices, **format_args) + if pre_eval: + # TODO: adapt samples_per_gpu > 1. + # only samples_per_gpu=1 valid now + result = dataset.pre_eval(result, indices=batch_indices) + + results.extend(result) + + if rank == 0: + batch_size = len(result) * world_size + for _ in range(batch_size): + prog_bar.update() + + # collect results from all ranks + if gpu_collect: + results = collect_results_gpu(results, len(dataset)) + else: + results = collect_results_cpu(results, len(dataset), tmpdir) + return results diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/train.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/train.py new file mode 100644 index 0000000..be8e422 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/apis/train.py @@ -0,0 +1,194 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import random +import warnings + +import mmcv +import numpy as np +import torch +import torch.distributed as dist +from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner, + build_runner, get_dist_info) +from mmcv.utils import build_from_cfg + +from mmseg import digit_version +from mmseg.core import DistEvalHook, EvalHook, build_optimizer +from mmseg.datasets import build_dataloader, build_dataset +from mmseg.utils import (build_ddp, build_dp, find_latest_checkpoint, + get_root_logger) + + +def init_random_seed(seed=None, device='cuda'): + """Initialize random seed. + + If the seed is not set, the seed will be automatically randomized, + and then broadcast to all processes to prevent some potential bugs. + Args: + seed (int, Optional): The seed. Default to None. + device (str): The device where the seed will be put on. + Default to 'cuda'. + Returns: + int: Seed to be used. + """ + if seed is not None: + return seed + + # Make sure all ranks share the same random seed to prevent + # some potential bugs. Please refer to + # https://github.com/open-mmlab/mmdetection/issues/6339 + rank, world_size = get_dist_info() + seed = np.random.randint(2**31) + if world_size == 1: + return seed + + if rank == 0: + random_num = torch.tensor(seed, dtype=torch.int32, device=device) + else: + random_num = torch.tensor(0, dtype=torch.int32, device=device) + dist.broadcast(random_num, src=0) + return random_num.item() + + +def set_random_seed(seed, deterministic=False): + """Set random seed. + + Args: + seed (int): Seed to be used. + deterministic (bool): Whether to set the deterministic option for + CUDNN backend, i.e., set `torch.backends.cudnn.deterministic` + to True and `torch.backends.cudnn.benchmark` to False. + Default: False. + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + if deterministic: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + + +def train_segmentor(model, + dataset, + cfg, + distributed=False, + validate=False, + timestamp=None, + meta=None): + """Launch segmentor training.""" + logger = get_root_logger(cfg.log_level) + + # prepare data loaders + dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset] + # The default loader config + loader_cfg = dict( + # cfg.gpus will be ignored if distributed + num_gpus=len(cfg.gpu_ids), + dist=distributed, + seed=cfg.seed, + drop_last=True) + # The overall dataloader settings + loader_cfg.update({ + k: v + for k, v in cfg.data.items() if k not in [ + 'train', 'val', 'test', 'train_dataloader', 'val_dataloader', + 'test_dataloader' + ] + }) + + # The specific dataloader settings + train_loader_cfg = {**loader_cfg, **cfg.data.get('train_dataloader', {})} + data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset] + + # put model on devices + if distributed: + find_unused_parameters = cfg.get('find_unused_parameters', False) + # Sets the `find_unused_parameters` parameter in + # DDP wrapper + model = build_ddp( + model, + cfg.device, + device_ids=[int(os.environ['LOCAL_RANK'])], + broadcast_buffers=False, + find_unused_parameters=find_unused_parameters) + else: + if not torch.cuda.is_available(): + assert digit_version(mmcv.__version__) >= digit_version('1.4.4'), \ + 'Please use MMCV >= 1.4.4 for CPU training!' + model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids) + + # build runner + optimizer = build_optimizer(model, cfg.optimizer) + + if cfg.get('runner') is None: + cfg.runner = {'type': 'IterBasedRunner', 'max_iters': cfg.total_iters} + warnings.warn( + 'config is now expected to have a `runner` section, ' + 'please set `runner` in your config.', UserWarning) + + runner = build_runner( + cfg.runner, + default_args=dict( + model=model, + batch_processor=None, + optimizer=optimizer, + work_dir=cfg.work_dir, + logger=logger, + meta=meta)) + + # register hooks + runner.register_training_hooks(cfg.lr_config, cfg.optimizer_config, + cfg.checkpoint_config, cfg.log_config, + cfg.get('momentum_config', None)) + if distributed: + # when distributed training by epoch, using`DistSamplerSeedHook` to set + # the different seed to distributed sampler for each epoch, it will + # shuffle dataset at each epoch and avoid overfitting. + if isinstance(runner, EpochBasedRunner): + runner.register_hook(DistSamplerSeedHook()) + + # an ugly walkaround to make the .log and .log.json filenames the same + runner.timestamp = timestamp + + # register eval hooks + if validate: + val_dataset = build_dataset(cfg.data.val, dict(test_mode=True)) + # The specific dataloader settings + val_loader_cfg = { + **loader_cfg, + 'samples_per_gpu': 1, + 'shuffle': False, # Not shuffle by default + **cfg.data.get('val_dataloader', {}), + } + val_dataloader = build_dataloader(val_dataset, **val_loader_cfg) + eval_cfg = cfg.get('evaluation', {}) + eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner' + eval_hook = DistEvalHook if distributed else EvalHook + # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the + # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'. + runner.register_hook( + eval_hook(val_dataloader, **eval_cfg), priority='LOW') + + # user-defined hooks + if cfg.get('custom_hooks', None): + custom_hooks = cfg.custom_hooks + assert isinstance(custom_hooks, list), \ + f'custom_hooks expect list type, but got {type(custom_hooks)}' + for hook_cfg in cfg.custom_hooks: + assert isinstance(hook_cfg, dict), \ + 'Each item in custom_hooks expects dict type, but got ' \ + f'{type(hook_cfg)}' + hook_cfg = hook_cfg.copy() + priority = hook_cfg.pop('priority', 'NORMAL') + hook = build_from_cfg(hook_cfg, HOOKS) + runner.register_hook(hook, priority=priority) + + if cfg.resume_from is None and cfg.get('auto_resume'): + resume_from = find_latest_checkpoint(cfg.work_dir) + if resume_from is not None: + cfg.resume_from = resume_from + if cfg.resume_from: + runner.resume(cfg.resume_from) + elif cfg.load_from: + runner.load_checkpoint(cfg.load_from) + runner.run(data_loaders, cfg.workflow) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/__init__.py new file mode 100644 index 0000000..1a077d2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .builder import (OPTIMIZER_BUILDERS, build_optimizer, + build_optimizer_constructor) +from .evaluation import * # noqa: F401, F403 +from .optimizers import * # noqa: F401, F403 +from .seg import * # noqa: F401, F403 +from .utils import * # noqa: F401, F403 + +__all__ = [ + 'OPTIMIZER_BUILDERS', 'build_optimizer', 'build_optimizer_constructor' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/builder.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/builder.py new file mode 100644 index 0000000..406dd9b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/builder.py @@ -0,0 +1,33 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy + +from mmcv.runner.optimizer import OPTIMIZER_BUILDERS as MMCV_OPTIMIZER_BUILDERS +from mmcv.utils import Registry, build_from_cfg + +OPTIMIZER_BUILDERS = Registry( + 'optimizer builder', parent=MMCV_OPTIMIZER_BUILDERS) + + +def build_optimizer_constructor(cfg): + constructor_type = cfg.get('type') + if constructor_type in OPTIMIZER_BUILDERS: + return build_from_cfg(cfg, OPTIMIZER_BUILDERS) + elif constructor_type in MMCV_OPTIMIZER_BUILDERS: + return build_from_cfg(cfg, MMCV_OPTIMIZER_BUILDERS) + else: + raise KeyError(f'{constructor_type} is not registered ' + 'in the optimizer builder registry.') + + +def build_optimizer(model, cfg): + optimizer_cfg = copy.deepcopy(cfg) + constructor_type = optimizer_cfg.pop('constructor', + 'DefaultOptimizerConstructor') + paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None) + optim_constructor = build_optimizer_constructor( + dict( + type=constructor_type, + optimizer_cfg=optimizer_cfg, + paramwise_cfg=paramwise_cfg)) + optimizer = optim_constructor(model) + return optimizer diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/__init__.py new file mode 100644 index 0000000..3d16d17 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .class_names import get_classes, get_palette +from .eval_hooks import DistEvalHook, EvalHook +from .metrics import (eval_metrics, intersect_and_union, mean_dice, + mean_fscore, mean_iou, pre_eval_to_metrics) + +__all__ = [ + 'EvalHook', 'DistEvalHook', 'mean_dice', 'mean_iou', 'mean_fscore', + 'eval_metrics', 'get_classes', 'get_palette', 'pre_eval_to_metrics', + 'intersect_and_union' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/class_names.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/class_names.py new file mode 100644 index 0000000..f04d458 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/class_names.py @@ -0,0 +1,324 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmcv + + +def cityscapes_classes(): + """Cityscapes class names for external use.""" + return [ + 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', + 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', + 'bicycle' + ] + + +def ade_classes(): + """ADE20K class names for external use.""" + return [ + 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', + 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', + 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', + 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', + 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', + 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', + 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', + 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', + 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', + 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', + 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', + 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', + 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', + 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', + 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', + 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', + 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', + 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', + 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', + 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', + 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', + 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', + 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', + 'clock', 'flag' + ] + + +def voc_classes(): + """Pascal VOC class names for external use.""" + return [ + 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', + 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', + 'tvmonitor' + ] + + +def cocostuff_classes(): + """CocoStuff class names for external use.""" + return [ + 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', + 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', + 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', + 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', + 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', + 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', + 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', + 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', + 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', + 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', + 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', + 'scissors', 'teddy bear', 'hair drier', 'toothbrush', 'banner', + 'blanket', 'branch', 'bridge', 'building-other', 'bush', 'cabinet', + 'cage', 'cardboard', 'carpet', 'ceiling-other', 'ceiling-tile', + 'cloth', 'clothes', 'clouds', 'counter', 'cupboard', 'curtain', + 'desk-stuff', 'dirt', 'door-stuff', 'fence', 'floor-marble', + 'floor-other', 'floor-stone', 'floor-tile', 'floor-wood', 'flower', + 'fog', 'food-other', 'fruit', 'furniture-other', 'grass', 'gravel', + 'ground-other', 'hill', 'house', 'leaves', 'light', 'mat', 'metal', + 'mirror-stuff', 'moss', 'mountain', 'mud', 'napkin', 'net', 'paper', + 'pavement', 'pillow', 'plant-other', 'plastic', 'platform', + 'playingfield', 'railing', 'railroad', 'river', 'road', 'rock', 'roof', + 'rug', 'salad', 'sand', 'sea', 'shelf', 'sky-other', 'skyscraper', + 'snow', 'solid-other', 'stairs', 'stone', 'straw', 'structural-other', + 'table', 'tent', 'textile-other', 'towel', 'tree', 'vegetable', + 'wall-brick', 'wall-concrete', 'wall-other', 'wall-panel', + 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', 'waterdrops', + 'window-blind', 'window-other', 'wood' + ] + + +def loveda_classes(): + """LoveDA class names for external use.""" + return [ + 'background', 'building', 'road', 'water', 'barren', 'forest', + 'agricultural' + ] + + +def potsdam_classes(): + """Potsdam class names for external use.""" + return [ + 'impervious_surface', 'building', 'low_vegetation', 'tree', 'car', + 'clutter' + ] + + +def vaihingen_classes(): + """Vaihingen class names for external use.""" + return [ + 'impervious_surface', 'building', 'low_vegetation', 'tree', 'car', + 'clutter' + ] + + +def isaid_classes(): + """iSAID class names for external use.""" + return [ + 'background', 'ship', 'store_tank', 'baseball_diamond', 'tennis_court', + 'basketball_court', 'Ground_Track_Field', 'Bridge', 'Large_Vehicle', + 'Small_Vehicle', 'Helicopter', 'Swimming_pool', 'Roundabout', + 'Soccer_ball_field', 'plane', 'Harbor' + ] + + +def stare_classes(): + """stare class names for external use.""" + return ['background', 'vessel'] + + +def cityscapes_palette(): + """Cityscapes palette for external use.""" + return [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], [0, 80, 100], + [0, 0, 230], [119, 11, 32]] + + +def ade_palette(): + """ADE20K palette for external use.""" + return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], + [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], + [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], + [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], + [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], + [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], + [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], + [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], + [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], + [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], + [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], + [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], + [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], + [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], + [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], + [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], + [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], + [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], + [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], + [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], + [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], + [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], + [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], + [102, 255, 0], [92, 0, 255]] + + +def voc_palette(): + """Pascal VOC palette for external use.""" + return [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + + +def cocostuff_palette(): + """CocoStuff palette for external use.""" + return [[0, 192, 64], [0, 192, 64], [0, 64, 96], [128, 192, 192], + [0, 64, 64], [0, 192, 224], [0, 192, 192], [128, 192, 64], + [0, 192, 96], [128, 192, 64], [128, 32, 192], [0, 0, 224], + [0, 0, 64], [0, 160, 192], [128, 0, 96], [128, 0, 192], + [0, 32, 192], [128, 128, 224], [0, 0, 192], [128, 160, 192], + [128, 128, 0], [128, 0, 32], [128, 32, 0], [128, 0, 128], + [64, 128, 32], [0, 160, 0], [0, 0, 0], [192, 128, 160], [0, 32, 0], + [0, 128, 128], [64, 128, 160], [128, 160, 0], [0, 128, 0], + [192, 128, 32], [128, 96, 128], [0, 0, 128], [64, 0, 32], + [0, 224, 128], [128, 0, 0], [192, 0, 160], [0, 96, 128], + [128, 128, 128], [64, 0, 160], [128, 224, 128], [128, 128, 64], + [192, 0, 32], [128, 96, 0], [128, 0, 192], [0, 128, 32], + [64, 224, 0], [0, 0, 64], [128, 128, 160], [64, 96, 0], + [0, 128, 192], [0, 128, 160], [192, 224, 0], [0, 128, 64], + [128, 128, 32], [192, 32, 128], [0, 64, 192], [0, 0, 32], + [64, 160, 128], [128, 64, 64], [128, 0, 160], [64, 32, 128], + [128, 192, 192], [0, 0, 160], [192, 160, 128], [128, 192, 0], + [128, 0, 96], [192, 32, 0], [128, 64, 128], [64, 128, 96], + [64, 160, 0], [0, 64, 0], [192, 128, 224], [64, 32, 0], + [0, 192, 128], [64, 128, 224], [192, 160, 0], [0, 192, 0], + [192, 128, 96], [192, 96, 128], [0, 64, 128], [64, 0, 96], + [64, 224, 128], [128, 64, 0], [192, 0, 224], [64, 96, 128], + [128, 192, 128], [64, 0, 224], [192, 224, 128], [128, 192, 64], + [192, 0, 96], [192, 96, 0], [128, 64, 192], [0, 128, 96], + [0, 224, 0], [64, 64, 64], [128, 128, 224], [0, 96, 0], + [64, 192, 192], [0, 128, 224], [128, 224, 0], [64, 192, 64], + [128, 128, 96], [128, 32, 128], [64, 0, 192], [0, 64, 96], + [0, 160, 128], [192, 0, 64], [128, 64, 224], [0, 32, 128], + [192, 128, 192], [0, 64, 224], [128, 160, 128], [192, 128, 0], + [128, 64, 32], [128, 32, 64], [192, 0, 128], [64, 192, 32], + [0, 160, 64], [64, 0, 0], [192, 192, 160], [0, 32, 64], + [64, 128, 128], [64, 192, 160], [128, 160, 64], [64, 128, 0], + [192, 192, 32], [128, 96, 192], [64, 0, 128], [64, 64, 32], + [0, 224, 192], [192, 0, 0], [192, 64, 160], [0, 96, 192], + [192, 128, 128], [64, 64, 160], [128, 224, 192], [192, 128, 64], + [192, 64, 32], [128, 96, 64], [192, 0, 192], [0, 192, 32], + [64, 224, 64], [64, 0, 64], [128, 192, 160], [64, 96, 64], + [64, 128, 192], [0, 192, 160], [192, 224, 64], [64, 128, 64], + [128, 192, 32], [192, 32, 192], [64, 64, 192], [0, 64, 32], + [64, 160, 192], [192, 64, 64], [128, 64, 160], [64, 32, 192], + [192, 192, 192], [0, 64, 160], [192, 160, 192], [192, 192, 0], + [128, 64, 96], [192, 32, 64], [192, 64, 128], [64, 192, 96], + [64, 160, 64], [64, 64, 0]] + + +def loveda_palette(): + """LoveDA palette for external use.""" + return [[255, 255, 255], [255, 0, 0], [255, 255, 0], [0, 0, 255], + [159, 129, 183], [0, 255, 0], [255, 195, 128]] + + +def potsdam_palette(): + """Potsdam palette for external use.""" + return [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], + [255, 255, 0], [255, 0, 0]] + + +def vaihingen_palette(): + """Vaihingen palette for external use.""" + return [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], + [255, 255, 0], [255, 0, 0]] + + +def isaid_palette(): + """iSAID palette for external use.""" + return [[0, 0, 0], [0, 0, 63], [0, 63, 63], [0, 63, 0], [0, 63, 127], + [0, 63, 191], [0, 63, 255], [0, 127, 63], [0, 127, + 127], [0, 0, 127], + [0, 0, 191], [0, 0, 255], [0, 191, 127], [0, 127, 191], + [0, 127, 255], [0, 100, 155]] + + +def stare_palette(): + """STARE palette for external use.""" + return [[120, 120, 120], [6, 230, 230]] + + +dataset_aliases = { + 'cityscapes': ['cityscapes'], + 'ade': ['ade', 'ade20k'], + 'voc': ['voc', 'pascal_voc', 'voc12', 'voc12aug'], + 'loveda': ['loveda'], + 'potsdam': ['potsdam'], + 'vaihingen': ['vaihingen'], + 'cocostuff': [ + 'cocostuff', 'cocostuff10k', 'cocostuff164k', 'coco-stuff', + 'coco-stuff10k', 'coco-stuff164k', 'coco_stuff', 'coco_stuff10k', + 'coco_stuff164k' + ], + 'isaid': ['isaid', 'iSAID'], + 'stare': ['stare', 'STARE'] +} + +def CustomDataset_classes(): + """stare class names for external use.""" + return ['background', 'black', 'silver', 'rainbow'] + + +def CustomDataset_palette(): + """Cityscapes palette for external use.""" + return [[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3]] + +def get_classes(dataset): + """Get class names of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_classes()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels + + +def get_palette(dataset): + """Get class palette (RGB) of a dataset.""" + alias2name = {} + for name, aliases in dataset_aliases.items(): + for alias in aliases: + alias2name[alias] = name + + if mmcv.is_str(dataset): + if dataset in alias2name: + labels = eval(alias2name[dataset] + '_palette()') + else: + raise ValueError(f'Unrecognized dataset: {dataset}') + else: + raise TypeError(f'dataset must a str, but got {type(dataset)}') + return labels diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/eval_hooks.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/eval_hooks.py new file mode 100644 index 0000000..952db3b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/eval_hooks.py @@ -0,0 +1,128 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings + +import torch.distributed as dist +from mmcv.runner import DistEvalHook as _DistEvalHook +from mmcv.runner import EvalHook as _EvalHook +from torch.nn.modules.batchnorm import _BatchNorm + + +class EvalHook(_EvalHook): + """Single GPU EvalHook, with efficient test support. + + Args: + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + pre_eval (bool): Whether to use progressive mode to evaluate model. + Default: False. + Returns: + list: The prediction results. + """ + + greater_keys = ['mIoU', 'mAcc', 'aAcc'] + + def __init__(self, + *args, + by_epoch=False, + efficient_test=False, + pre_eval=False, + **kwargs): + super().__init__(*args, by_epoch=by_epoch, **kwargs) + self.pre_eval = pre_eval + if efficient_test: + warnings.warn( + 'DeprecationWarning: ``efficient_test`` for evaluation hook ' + 'is deprecated, the evaluation hook is CPU memory friendly ' + 'with ``pre_eval=True`` as argument for ``single_gpu_test()`` ' + 'function') + + def _do_evaluate(self, runner): + """perform evaluation and save ckpt.""" + if not self._should_evaluate(runner): + return + + from mmseg.apis import single_gpu_test + results = single_gpu_test( + runner.model, self.dataloader, show=False, pre_eval=self.pre_eval) + runner.log_buffer.clear() + runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) + key_score = self.evaluate(runner, results) + if self.save_best: + self._save_ckpt(runner, key_score) + + +class DistEvalHook(_DistEvalHook): + """Distributed EvalHook, with efficient test support. + + Args: + by_epoch (bool): Determine perform evaluation by epoch or by iteration. + If set to True, it will perform by epoch. Otherwise, by iteration. + Default: False. + efficient_test (bool): Whether save the results as local numpy files to + save CPU memory during evaluation. Default: False. + pre_eval (bool): Whether to use progressive mode to evaluate model. + Default: False. + Returns: + list: The prediction results. + """ + + greater_keys = ['mIoU', 'mAcc', 'aAcc'] + + def __init__(self, + *args, + by_epoch=False, + efficient_test=False, + pre_eval=False, + **kwargs): + super().__init__(*args, by_epoch=by_epoch, **kwargs) + self.pre_eval = pre_eval + if efficient_test: + warnings.warn( + 'DeprecationWarning: ``efficient_test`` for evaluation hook ' + 'is deprecated, the evaluation hook is CPU memory friendly ' + 'with ``pre_eval=True`` as argument for ``multi_gpu_test()`` ' + 'function') + + def _do_evaluate(self, runner): + """perform evaluation and save ckpt.""" + # Synchronization of BatchNorm's buffer (running_mean + # and running_var) is not supported in the DDP of pytorch, + # which may cause the inconsistent performance of models in + # different ranks, so we broadcast BatchNorm's buffers + # of rank 0 to other ranks to avoid this. + if self.broadcast_bn_buffer: + model = runner.model + for name, module in model.named_modules(): + if isinstance(module, + _BatchNorm) and module.track_running_stats: + dist.broadcast(module.running_var, 0) + dist.broadcast(module.running_mean, 0) + + if not self._should_evaluate(runner): + return + + tmpdir = self.tmpdir + if tmpdir is None: + tmpdir = osp.join(runner.work_dir, '.eval_hook') + + from mmseg.apis import multi_gpu_test + results = multi_gpu_test( + runner.model, + self.dataloader, + tmpdir=tmpdir, + gpu_collect=self.gpu_collect, + pre_eval=self.pre_eval) + + runner.log_buffer.clear() + + if runner.rank == 0: + print('\n') + runner.log_buffer.output['eval_iter_num'] = len(self.dataloader) + key_score = self.evaluate(runner, results) + + if self.save_best: + self._save_ckpt(runner, key_score) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/metrics.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/metrics.py new file mode 100644 index 0000000..0d1b9f5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/evaluation/metrics.py @@ -0,0 +1,395 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections import OrderedDict + +import mmcv +import numpy as np +import torch + + +def f_score(precision, recall, beta=1): + """calculate the f-score value. + + Args: + precision (float | torch.Tensor): The precision value. + recall (float | torch.Tensor): The recall value. + beta (int): Determines the weight of recall in the combined score. + Default: False. + + Returns: + [torch.tensor]: The f-score value. + """ + score = (1 + beta**2) * (precision * recall) / ( + (beta**2 * precision) + recall) + return score + + +def intersect_and_union(pred_label, + label, + num_classes, + ignore_index, + label_map=dict(), + reduce_zero_label=False): + """Calculate intersection and Union. + + Args: + pred_label (ndarray | str): Prediction segmentation map + or predict result filename. + label (ndarray | str): Ground truth segmentation map + or label filename. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. The parameter will + work only when label is str. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. The parameter will + work only when label is str. Default: False. + + Returns: + torch.Tensor: The intersection of prediction and ground truth + histogram on all classes. + torch.Tensor: The union of prediction and ground truth histogram on + all classes. + torch.Tensor: The prediction histogram on all classes. + torch.Tensor: The ground truth histogram on all classes. + """ + + if isinstance(pred_label, str): + pred_label = torch.from_numpy(np.load(pred_label)) + else: + pred_label = torch.from_numpy((pred_label)) + + if isinstance(label, str): + label = torch.from_numpy( + mmcv.imread(label, flag='unchanged', backend='pillow')) + else: + label = torch.from_numpy(label) + + if label_map is not None: + for old_id, new_id in label_map.items(): + label[label == old_id] = new_id + if reduce_zero_label: + label[label == 0] = 255 + label = label - 1 + label[label == 254] = 255 + + mask = (label != ignore_index) + pred_label = pred_label[mask] + label = label[mask] + + intersect = pred_label[pred_label == label] + area_intersect = torch.histc( + intersect.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_pred_label = torch.histc( + pred_label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_label = torch.histc( + label.float(), bins=(num_classes), min=0, max=num_classes - 1) + area_union = area_pred_label + area_label - area_intersect + return area_intersect, area_union, area_pred_label, area_label + + +def total_intersect_and_union(results, + gt_seg_maps, + num_classes, + ignore_index, + label_map=dict(), + reduce_zero_label=False): + """Calculate Total Intersection and Union. + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str] | Iterables): list of ground + truth segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. Default: False. + + Returns: + ndarray: The intersection of prediction and ground truth histogram + on all classes. + ndarray: The union of prediction and ground truth histogram on all + classes. + ndarray: The prediction histogram on all classes. + ndarray: The ground truth histogram on all classes. + """ + total_area_intersect = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_union = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_pred_label = torch.zeros((num_classes, ), dtype=torch.float64) + total_area_label = torch.zeros((num_classes, ), dtype=torch.float64) + for result, gt_seg_map in zip(results, gt_seg_maps): + area_intersect, area_union, area_pred_label, area_label = \ + intersect_and_union( + result, gt_seg_map, num_classes, ignore_index, + label_map, reduce_zero_label) + total_area_intersect += area_intersect + total_area_union += area_union + total_area_pred_label += area_pred_label + total_area_label += area_label + return total_area_intersect, total_area_union, total_area_pred_label, \ + total_area_label + + +def mean_iou(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False): + """Calculate Mean Intersection and Union (mIoU) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. Default: False. + + Returns: + dict[str, float | ndarray]: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category IoU, shape (num_classes, ). + """ + iou_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mIoU'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label) + return iou_result + + +def mean_dice(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False): + """Calculate Mean Dice (mDice) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. Default: False. + + Returns: + dict[str, float | ndarray]: Default metrics. + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category dice, shape (num_classes, ). + """ + + dice_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mDice'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label) + return dice_result + + +def mean_fscore(results, + gt_seg_maps, + num_classes, + ignore_index, + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False, + beta=1): + """Calculate Mean F-Score (mFscore) + + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str]): list of ground truth + segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. Default: False. + beta (int): Determines the weight of recall in the combined score. + Default: False. + + + Returns: + dict[str, float | ndarray]: Default metrics. + float: Overall accuracy on all images. + ndarray: Per category recall, shape (num_classes, ). + ndarray: Per category precision, shape (num_classes, ). + ndarray: Per category f-score, shape (num_classes, ). + """ + fscore_result = eval_metrics( + results=results, + gt_seg_maps=gt_seg_maps, + num_classes=num_classes, + ignore_index=ignore_index, + metrics=['mFscore'], + nan_to_num=nan_to_num, + label_map=label_map, + reduce_zero_label=reduce_zero_label, + beta=beta) + return fscore_result + + +def eval_metrics(results, + gt_seg_maps, + num_classes, + ignore_index, + metrics=['mIoU'], + nan_to_num=None, + label_map=dict(), + reduce_zero_label=False, + beta=1): + """Calculate evaluation metrics + Args: + results (list[ndarray] | list[str]): List of prediction segmentation + maps or list of prediction result filenames. + gt_seg_maps (list[ndarray] | list[str] | Iterables): list of ground + truth segmentation maps or list of label filenames. + num_classes (int): Number of categories. + ignore_index (int): Index that will be ignored in evaluation. + metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + label_map (dict): Mapping old labels to new labels. Default: dict(). + reduce_zero_label (bool): Whether ignore zero label. Default: False. + Returns: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category evaluation metrics, shape (num_classes, ). + """ + + total_area_intersect, total_area_union, total_area_pred_label, \ + total_area_label = total_intersect_and_union( + results, gt_seg_maps, num_classes, ignore_index, label_map, + reduce_zero_label) + ret_metrics = total_area_to_metrics(total_area_intersect, total_area_union, + total_area_pred_label, + total_area_label, metrics, nan_to_num, + beta) + + return ret_metrics + + +def pre_eval_to_metrics(pre_eval_results, + metrics=['mIoU'], + nan_to_num=None, + beta=1): + """Convert pre-eval results to metrics. + + Args: + pre_eval_results (list[tuple[torch.Tensor]]): per image eval results + for computing evaluation metric + metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + Returns: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category evaluation metrics, shape (num_classes, ). + """ + + # convert list of tuples to tuple of lists, e.g. + # [(A_1, B_1, C_1, D_1), ..., (A_n, B_n, C_n, D_n)] to + # ([A_1, ..., A_n], ..., [D_1, ..., D_n]) + pre_eval_results = tuple(zip(*pre_eval_results)) + assert len(pre_eval_results) == 4 + + total_area_intersect = sum(pre_eval_results[0]) + total_area_union = sum(pre_eval_results[1]) + total_area_pred_label = sum(pre_eval_results[2]) + total_area_label = sum(pre_eval_results[3]) + + ret_metrics = total_area_to_metrics(total_area_intersect, total_area_union, + total_area_pred_label, + total_area_label, metrics, nan_to_num, + beta) + + return ret_metrics + + +def total_area_to_metrics(total_area_intersect, + total_area_union, + total_area_pred_label, + total_area_label, + metrics=['mIoU'], + nan_to_num=None, + beta=1): + """Calculate evaluation metrics + Args: + total_area_intersect (ndarray): The intersection of prediction and + ground truth histogram on all classes. + total_area_union (ndarray): The union of prediction and ground truth + histogram on all classes. + total_area_pred_label (ndarray): The prediction histogram on all + classes. + total_area_label (ndarray): The ground truth histogram on all classes. + metrics (list[str] | str): Metrics to be evaluated, 'mIoU' and 'mDice'. + nan_to_num (int, optional): If specified, NaN values will be replaced + by the numbers defined by the user. Default: None. + Returns: + float: Overall accuracy on all images. + ndarray: Per category accuracy, shape (num_classes, ). + ndarray: Per category evaluation metrics, shape (num_classes, ). + """ + if isinstance(metrics, str): + metrics = [metrics] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metrics).issubset(set(allowed_metrics)): + raise KeyError('metrics {} is not supported'.format(metrics)) + + all_acc = total_area_intersect.sum() / total_area_label.sum() + ret_metrics = OrderedDict({'aAcc': all_acc}) + for metric in metrics: + if metric == 'mIoU': + iou = total_area_intersect / total_area_union + acc = total_area_intersect / total_area_label + ret_metrics['IoU'] = iou + ret_metrics['Acc'] = acc + elif metric == 'mDice': + dice = 2 * total_area_intersect / ( + total_area_pred_label + total_area_label) + acc = total_area_intersect / total_area_label + ret_metrics['Dice'] = dice + ret_metrics['Acc'] = acc + elif metric == 'mFscore': + precision = total_area_intersect / total_area_pred_label + recall = total_area_intersect / total_area_label + f_value = torch.tensor( + [f_score(x[0], x[1], beta) for x in zip(precision, recall)]) + ret_metrics['Fscore'] = f_value + ret_metrics['Precision'] = precision + ret_metrics['Recall'] = recall + + ret_metrics = { + metric: value.numpy() + for metric, value in ret_metrics.items() + } + if nan_to_num is not None: + ret_metrics = OrderedDict({ + metric: np.nan_to_num(metric_value, nan=nan_to_num) + for metric, metric_value in ret_metrics.items() + }) + return ret_metrics diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/optimizers/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/optimizers/__init__.py new file mode 100644 index 0000000..4fbf4ec --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/optimizers/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .layer_decay_optimizer_constructor import ( + LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) + +__all__ = [ + 'LearningRateDecayOptimizerConstructor', 'LayerDecayOptimizerConstructor' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/optimizers/layer_decay_optimizer_constructor.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/optimizers/layer_decay_optimizer_constructor.py new file mode 100644 index 0000000..2b6b8ff --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/optimizers/layer_decay_optimizer_constructor.py @@ -0,0 +1,208 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import json +import warnings + +from mmcv.runner import DefaultOptimizerConstructor, get_dist_info + +from mmseg.utils import get_root_logger +from ..builder import OPTIMIZER_BUILDERS + + +def get_layer_id_for_convnext(var_name, max_layer_id): + """Get the layer id to set the different learning rates in ``layer_wise`` + decay_type. + + Args: + var_name (str): The key of the model. + max_layer_id (int): Maximum number of backbone layers. + + Returns: + int: The id number corresponding to different learning rate in + ``LearningRateDecayOptimizerConstructor``. + """ + + if var_name in ('backbone.cls_token', 'backbone.mask_token', + 'backbone.pos_embed'): + return 0 + elif var_name.startswith('backbone.downsample_layers'): + stage_id = int(var_name.split('.')[2]) + if stage_id == 0: + layer_id = 0 + elif stage_id == 1: + layer_id = 2 + elif stage_id == 2: + layer_id = 3 + elif stage_id == 3: + layer_id = max_layer_id + return layer_id + elif var_name.startswith('backbone.stages'): + stage_id = int(var_name.split('.')[2]) + block_id = int(var_name.split('.')[3]) + if stage_id == 0: + layer_id = 1 + elif stage_id == 1: + layer_id = 2 + elif stage_id == 2: + layer_id = 3 + block_id // 3 + elif stage_id == 3: + layer_id = max_layer_id + return layer_id + else: + return max_layer_id + 1 + + +def get_stage_id_for_convnext(var_name, max_stage_id): + """Get the stage id to set the different learning rates in ``stage_wise`` + decay_type. + + Args: + var_name (str): The key of the model. + max_stage_id (int): Maximum number of backbone layers. + + Returns: + int: The id number corresponding to different learning rate in + ``LearningRateDecayOptimizerConstructor``. + """ + + if var_name in ('backbone.cls_token', 'backbone.mask_token', + 'backbone.pos_embed'): + return 0 + elif var_name.startswith('backbone.downsample_layers'): + return 0 + elif var_name.startswith('backbone.stages'): + stage_id = int(var_name.split('.')[2]) + return stage_id + 1 + else: + return max_stage_id - 1 + + +def get_layer_id_for_vit(var_name, max_layer_id): + """Get the layer id to set the different learning rates. + + Args: + var_name (str): The key of the model. + num_max_layer (int): Maximum number of backbone layers. + + Returns: + int: Returns the layer id of the key. + """ + + if var_name in ('backbone.cls_token', 'backbone.mask_token', + 'backbone.pos_embed'): + return 0 + elif var_name.startswith('backbone.patch_embed'): + return 0 + elif var_name.startswith('backbone.layers'): + layer_id = int(var_name.split('.')[2]) + return layer_id + 1 + else: + return max_layer_id - 1 + + +@OPTIMIZER_BUILDERS.register_module() +class LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor): + """Different learning rates are set for different layers of backbone. + + Note: Currently, this optimizer constructor is built for ConvNeXt, + BEiT and MAE. + """ + + def add_params(self, params, module, **kwargs): + """Add all parameters of module to the params list. + + The parameters of the given module will be added to the list of param + groups, with specific rules defined by paramwise_cfg. + + Args: + params (list[dict]): A list of param groups, it will be modified + in place. + module (nn.Module): The module to be added. + """ + logger = get_root_logger() + + parameter_groups = {} + logger.info(f'self.paramwise_cfg is {self.paramwise_cfg}') + num_layers = self.paramwise_cfg.get('num_layers') + 2 + decay_rate = self.paramwise_cfg.get('decay_rate') + decay_type = self.paramwise_cfg.get('decay_type', 'layer_wise') + logger.info('Build LearningRateDecayOptimizerConstructor ' + f'{decay_type} {decay_rate} - {num_layers}') + weight_decay = self.base_wd + for name, param in module.named_parameters(): + if not param.requires_grad: + continue # frozen weights + if len(param.shape) == 1 or name.endswith('.bias') or name in ( + 'pos_embed', 'cls_token'): + group_name = 'no_decay' + this_weight_decay = 0. + else: + group_name = 'decay' + this_weight_decay = weight_decay + if 'layer_wise' in decay_type: + if 'ConvNeXt' in module.backbone.__class__.__name__: + layer_id = get_layer_id_for_convnext( + name, self.paramwise_cfg.get('num_layers')) + logger.info(f'set param {name} as id {layer_id}') + elif 'BEiT' in module.backbone.__class__.__name__ or \ + 'MAE' in module.backbone.__class__.__name__: + layer_id = get_layer_id_for_vit(name, num_layers) + logger.info(f'set param {name} as id {layer_id}') + else: + raise NotImplementedError() + elif decay_type == 'stage_wise': + if 'ConvNeXt' in module.backbone.__class__.__name__: + layer_id = get_stage_id_for_convnext(name, num_layers) + logger.info(f'set param {name} as id {layer_id}') + else: + raise NotImplementedError() + group_name = f'layer_{layer_id}_{group_name}' + + if group_name not in parameter_groups: + scale = decay_rate**(num_layers - layer_id - 1) + + parameter_groups[group_name] = { + 'weight_decay': this_weight_decay, + 'params': [], + 'param_names': [], + 'lr_scale': scale, + 'group_name': group_name, + 'lr': scale * self.base_lr, + } + + parameter_groups[group_name]['params'].append(param) + parameter_groups[group_name]['param_names'].append(name) + rank, _ = get_dist_info() + if rank == 0: + to_display = {} + for key in parameter_groups: + to_display[key] = { + 'param_names': parameter_groups[key]['param_names'], + 'lr_scale': parameter_groups[key]['lr_scale'], + 'lr': parameter_groups[key]['lr'], + 'weight_decay': parameter_groups[key]['weight_decay'], + } + logger.info(f'Param groups = {json.dumps(to_display, indent=2)}') + params.extend(parameter_groups.values()) + + +@OPTIMIZER_BUILDERS.register_module() +class LayerDecayOptimizerConstructor(LearningRateDecayOptimizerConstructor): + """Different learning rates are set for different layers of backbone. + + Note: Currently, this optimizer constructor is built for BEiT, + and it will be deprecated. + Please use ``LearningRateDecayOptimizerConstructor`` instead. + """ + + def __init__(self, optimizer_cfg, paramwise_cfg): + warnings.warn('DeprecationWarning: Original ' + 'LayerDecayOptimizerConstructor of BEiT ' + 'will be deprecated. Please use ' + 'LearningRateDecayOptimizerConstructor instead, ' + 'and set decay_type = layer_wise_vit in paramwise_cfg.') + paramwise_cfg.update({'decay_type': 'layer_wise_vit'}) + warnings.warn('DeprecationWarning: Layer_decay_rate will ' + 'be deleted, please use decay_rate instead.') + paramwise_cfg['decay_rate'] = paramwise_cfg.pop('layer_decay_rate') + super(LayerDecayOptimizerConstructor, + self).__init__(optimizer_cfg, paramwise_cfg) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/__init__.py new file mode 100644 index 0000000..5206b96 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .builder import build_pixel_sampler +from .sampler import BasePixelSampler, OHEMPixelSampler + +__all__ = ['build_pixel_sampler', 'BasePixelSampler', 'OHEMPixelSampler'] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/builder.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/builder.py new file mode 100644 index 0000000..1cecd34 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/builder.py @@ -0,0 +1,9 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.utils import Registry, build_from_cfg + +PIXEL_SAMPLERS = Registry('pixel sampler') + + +def build_pixel_sampler(cfg, **default_args): + """Build pixel sampler for segmentation map.""" + return build_from_cfg(cfg, PIXEL_SAMPLERS, default_args) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/sampler/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/sampler/__init__.py new file mode 100644 index 0000000..5a76485 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/sampler/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base_pixel_sampler import BasePixelSampler +from .ohem_pixel_sampler import OHEMPixelSampler + +__all__ = ['BasePixelSampler', 'OHEMPixelSampler'] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/sampler/base_pixel_sampler.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/sampler/base_pixel_sampler.py new file mode 100644 index 0000000..03672cd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/sampler/base_pixel_sampler.py @@ -0,0 +1,13 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod + + +class BasePixelSampler(metaclass=ABCMeta): + """Base class of pixel sampler.""" + + def __init__(self, **kwargs): + pass + + @abstractmethod + def sample(self, seg_logit, seg_label): + """Placeholder for sample function.""" diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/sampler/ohem_pixel_sampler.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/sampler/ohem_pixel_sampler.py new file mode 100644 index 0000000..833a287 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/seg/sampler/ohem_pixel_sampler.py @@ -0,0 +1,85 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import PIXEL_SAMPLERS +from .base_pixel_sampler import BasePixelSampler + + +@PIXEL_SAMPLERS.register_module() +class OHEMPixelSampler(BasePixelSampler): + """Online Hard Example Mining Sampler for segmentation. + + Args: + context (nn.Module): The context of sampler, subclass of + :obj:`BaseDecodeHead`. + thresh (float, optional): The threshold for hard example selection. + Below which, are prediction with low confidence. If not + specified, the hard examples will be pixels of top ``min_kept`` + loss. Default: None. + min_kept (int, optional): The minimum number of predictions to keep. + Default: 100000. + """ + + def __init__(self, context, thresh=None, min_kept=100000): + super(OHEMPixelSampler, self).__init__() + self.context = context + assert min_kept > 1 + self.thresh = thresh + self.min_kept = min_kept + + def sample(self, seg_logit, seg_label): + """Sample pixels that have high loss or with low prediction confidence. + + Args: + seg_logit (torch.Tensor): segmentation logits, shape (N, C, H, W) + seg_label (torch.Tensor): segmentation label, shape (N, 1, H, W) + + Returns: + torch.Tensor: segmentation weight, shape (N, H, W) + """ + with torch.no_grad(): + assert seg_logit.shape[2:] == seg_label.shape[2:] + assert seg_label.shape[1] == 1 + seg_label = seg_label.squeeze(1).long() + batch_kept = self.min_kept * seg_label.size(0) + valid_mask = seg_label != self.context.ignore_index + seg_weight = seg_logit.new_zeros(size=seg_label.size()) + valid_seg_weight = seg_weight[valid_mask] + if self.thresh is not None: + seg_prob = F.softmax(seg_logit, dim=1) + + tmp_seg_label = seg_label.clone().unsqueeze(1) + tmp_seg_label[tmp_seg_label == self.context.ignore_index] = 0 + seg_prob = seg_prob.gather(1, tmp_seg_label).squeeze(1) + sort_prob, sort_indices = seg_prob[valid_mask].sort() + + if sort_prob.numel() > 0: + min_threshold = sort_prob[min(batch_kept, + sort_prob.numel() - 1)] + else: + min_threshold = 0.0 + threshold = max(min_threshold, self.thresh) + valid_seg_weight[seg_prob[valid_mask] < threshold] = 1. + else: + if not isinstance(self.context.loss_decode, nn.ModuleList): + losses_decode = [self.context.loss_decode] + else: + losses_decode = self.context.loss_decode + losses = 0.0 + for loss_module in losses_decode: + losses += loss_module( + seg_logit, + seg_label, + weight=None, + ignore_index=self.context.ignore_index, + reduction_override='none') + + # faster than topk according to https://github.com/pytorch/pytorch/issues/22812 # noqa + _, sort_indices = losses[valid_mask].sort(descending=True) + valid_seg_weight[sort_indices[:batch_kept]] = 1. + + seg_weight[valid_mask] = valid_seg_weight + + return seg_weight diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/utils/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/utils/__init__.py new file mode 100644 index 0000000..2888289 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/utils/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .dist_util import check_dist_init, sync_random_seed +from .misc import add_prefix + +__all__ = ['add_prefix', 'check_dist_init', 'sync_random_seed'] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/utils/dist_util.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/utils/dist_util.py new file mode 100644 index 0000000..b328851 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/utils/dist_util.py @@ -0,0 +1,46 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch +import torch.distributed as dist +from mmcv.runner import get_dist_info + + +def check_dist_init(): + return dist.is_available() and dist.is_initialized() + + +def sync_random_seed(seed=None, device='cuda'): + """Make sure different ranks share the same seed. All workers must call + this function, otherwise it will deadlock. This method is generally used in + `DistributedSampler`, because the seed should be identical across all + processes in the distributed group. + + In distributed sampling, different ranks should sample non-overlapped + data in the dataset. Therefore, this function is used to make sure that + each rank shuffles the data indices in the same order based + on the same seed. Then different ranks could use different indices + to select non-overlapped data from the same data list. + + Args: + seed (int, Optional): The seed. Default to None. + device (str): The device where the seed will be put on. + Default to 'cuda'. + Returns: + int: Seed to be used. + """ + + if seed is None: + seed = np.random.randint(2**31) + assert isinstance(seed, int) + + rank, world_size = get_dist_info() + + if world_size == 1: + return seed + + if rank == 0: + random_num = torch.tensor(seed, dtype=torch.int32, device=device) + else: + random_num = torch.tensor(0, dtype=torch.int32, device=device) + dist.broadcast(random_num, src=0) + return random_num.item() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/utils/misc.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/utils/misc.py new file mode 100644 index 0000000..282bb8d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/core/utils/misc.py @@ -0,0 +1,18 @@ +# Copyright (c) OpenMMLab. All rights reserved. +def add_prefix(inputs, prefix): + """Add prefix for dict. + + Args: + inputs (dict): The input dict with str keys. + prefix (str): The prefix to add. + + Returns: + + dict: The dict with keys updated with ``prefix``. + """ + + outputs = dict() + for name, value in inputs.items(): + outputs[f'{prefix}.{name}'] = value + + return outputs diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/__init__.py new file mode 100644 index 0000000..5d42a11 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .ade import ADE20KDataset +from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset +from .chase_db1 import ChaseDB1Dataset +from .cityscapes import CityscapesDataset +from .coco_stuff import COCOStuffDataset +from .custom import CustomDataset +from .dark_zurich import DarkZurichDataset +from .dataset_wrappers import (ConcatDataset, MultiImageMixDataset, + RepeatDataset) +from .drive import DRIVEDataset +from .hrf import HRFDataset +from .isaid import iSAIDDataset +from .isprs import ISPRSDataset +from .loveda import LoveDADataset +from .night_driving import NightDrivingDataset +from .pascal_context import PascalContextDataset, PascalContextDataset59 +from .potsdam import PotsdamDataset +from .stare import STAREDataset +from .voc import PascalVOCDataset + +__all__ = [ + 'CustomDataset', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', + 'DATASETS', 'build_dataset', 'PIPELINES', 'CityscapesDataset', + 'PascalVOCDataset', 'ADE20KDataset', 'PascalContextDataset', + 'PascalContextDataset59', 'ChaseDB1Dataset', 'DRIVEDataset', 'HRFDataset', + 'STAREDataset', 'DarkZurichDataset', 'NightDrivingDataset', + 'COCOStuffDataset', 'LoveDADataset', 'MultiImageMixDataset', + 'iSAIDDataset', 'ISPRSDataset', 'PotsdamDataset' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/ade.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/ade.py new file mode 100644 index 0000000..db94ceb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/ade.py @@ -0,0 +1,167 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +import mmcv +import numpy as np +from PIL import Image + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ADE20KDataset(CustomDataset): + """ADE20K dataset. + + In segmentation map annotation for ADE20K, 0 stands for background, which + is not included in 150 categories. ``reduce_zero_label`` is fixed to True. + The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is fixed to + '.png'. + """ + CLASSES = ( + 'wall', 'building', 'sky', 'floor', 'tree', 'ceiling', 'road', 'bed ', + 'windowpane', 'grass', 'cabinet', 'sidewalk', 'person', 'earth', + 'door', 'table', 'mountain', 'plant', 'curtain', 'chair', 'car', + 'water', 'painting', 'sofa', 'shelf', 'house', 'sea', 'mirror', 'rug', + 'field', 'armchair', 'seat', 'fence', 'desk', 'rock', 'wardrobe', + 'lamp', 'bathtub', 'railing', 'cushion', 'base', 'box', 'column', + 'signboard', 'chest of drawers', 'counter', 'sand', 'sink', + 'skyscraper', 'fireplace', 'refrigerator', 'grandstand', 'path', + 'stairs', 'runway', 'case', 'pool table', 'pillow', 'screen door', + 'stairway', 'river', 'bridge', 'bookcase', 'blind', 'coffee table', + 'toilet', 'flower', 'book', 'hill', 'bench', 'countertop', 'stove', + 'palm', 'kitchen island', 'computer', 'swivel chair', 'boat', 'bar', + 'arcade machine', 'hovel', 'bus', 'towel', 'light', 'truck', 'tower', + 'chandelier', 'awning', 'streetlight', 'booth', 'television receiver', + 'airplane', 'dirt track', 'apparel', 'pole', 'land', 'bannister', + 'escalator', 'ottoman', 'bottle', 'buffet', 'poster', 'stage', 'van', + 'ship', 'fountain', 'conveyer belt', 'canopy', 'washer', 'plaything', + 'swimming pool', 'stool', 'barrel', 'basket', 'waterfall', 'tent', + 'bag', 'minibike', 'cradle', 'oven', 'ball', 'food', 'step', 'tank', + 'trade name', 'microwave', 'pot', 'animal', 'bicycle', 'lake', + 'dishwasher', 'screen', 'blanket', 'sculpture', 'hood', 'sconce', + 'vase', 'traffic light', 'tray', 'ashcan', 'fan', 'pier', 'crt screen', + 'plate', 'monitor', 'bulletin board', 'shower', 'radiator', 'glass', + 'clock', 'flag') + + PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], + [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], + [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], + [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], + [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], + [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], + [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], + [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], + [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], + [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], + [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], + [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], + [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], + [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], + [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], + [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], + [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], + [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], + [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], + [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], + [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], + [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], + [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], + [102, 255, 0], [92, 0, 255]] + + def __init__(self, **kwargs): + super(ADE20KDataset, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) + + def results2img(self, results, imgfile_prefix, to_label_id, indices=None): + """Write the segmentation results to images. + + Args: + results (list[ndarray]): Testing results of the + dataset. + imgfile_prefix (str): The filename prefix of the png files. + If the prefix is "somepath/xxx", + the png files will be named "somepath/xxx.png". + to_label_id (bool): whether convert output to label_id for + submission. + indices (list[int], optional): Indices of input results, if not + set, all the indices of the dataset will be used. + Default: None. + + Returns: + list[str: str]: result txt files which contains corresponding + semantic segmentation images. + """ + if indices is None: + indices = list(range(len(self))) + + mmcv.mkdir_or_exist(imgfile_prefix) + result_files = [] + for result, idx in zip(results, indices): + + filename = self.img_infos[idx]['filename'] + basename = osp.splitext(osp.basename(filename))[0] + + png_filename = osp.join(imgfile_prefix, f'{basename}.png') + + # The index range of official requirement is from 0 to 150. + # But the index range of output is from 0 to 149. + # That is because we set reduce_zero_label=True. + result = result + 1 + + output = Image.fromarray(result.astype(np.uint8)) + output.save(png_filename) + result_files.append(png_filename) + + return result_files + + def format_results(self, + results, + imgfile_prefix, + to_label_id=True, + indices=None): + """Format the results into dir (standard format for ade20k evaluation). + + Args: + results (list): Testing results of the dataset. + imgfile_prefix (str | None): The prefix of images files. It + includes the file path and the prefix of filename, e.g., + "a/b/prefix". + to_label_id (bool): whether convert output to label_id for + submission. Default: False + indices (list[int], optional): Indices of input results, if not + set, all the indices of the dataset will be used. + Default: None. + + Returns: + tuple: (result_files, tmp_dir), result_files is a list containing + the image paths, tmp_dir is the temporal directory created + for saving json/png files when img_prefix is not specified. + """ + + if indices is None: + indices = list(range(len(self))) + + assert isinstance(results, list), 'results must be a list.' + assert isinstance(indices, list), 'indices must be a list.' + + result_files = self.results2img(results, imgfile_prefix, to_label_id, + indices) + return result_files diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/builder.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/builder.py new file mode 100644 index 0000000..4d852d3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/builder.py @@ -0,0 +1,191 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import platform +import random +from functools import partial + +import numpy as np +import torch +from mmcv.parallel import collate +from mmcv.runner import get_dist_info +from mmcv.utils import Registry, build_from_cfg, digit_version +from torch.utils.data import DataLoader + +from .samplers import DistributedSampler + +if platform.system() != 'Windows': + # https://github.com/pytorch/pytorch/issues/973 + import resource + rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) + base_soft_limit = rlimit[0] + hard_limit = rlimit[1] + soft_limit = min(max(4096, base_soft_limit), hard_limit) + resource.setrlimit(resource.RLIMIT_NOFILE, (soft_limit, hard_limit)) + +DATASETS = Registry('dataset') +PIPELINES = Registry('pipeline') + + +def _concat_dataset(cfg, default_args=None): + """Build :obj:`ConcatDataset by.""" + from .dataset_wrappers import ConcatDataset + img_dir = cfg['img_dir'] + ann_dir = cfg.get('ann_dir', None) + split = cfg.get('split', None) + # pop 'separate_eval' since it is not a valid key for common datasets. + separate_eval = cfg.pop('separate_eval', True) + num_img_dir = len(img_dir) if isinstance(img_dir, (list, tuple)) else 1 + if ann_dir is not None: + num_ann_dir = len(ann_dir) if isinstance(ann_dir, (list, tuple)) else 1 + else: + num_ann_dir = 0 + if split is not None: + num_split = len(split) if isinstance(split, (list, tuple)) else 1 + else: + num_split = 0 + if num_img_dir > 1: + assert num_img_dir == num_ann_dir or num_ann_dir == 0 + assert num_img_dir == num_split or num_split == 0 + else: + assert num_split == num_ann_dir or num_ann_dir <= 1 + num_dset = max(num_split, num_img_dir) + + datasets = [] + for i in range(num_dset): + data_cfg = copy.deepcopy(cfg) + if isinstance(img_dir, (list, tuple)): + data_cfg['img_dir'] = img_dir[i] + if isinstance(ann_dir, (list, tuple)): + data_cfg['ann_dir'] = ann_dir[i] + if isinstance(split, (list, tuple)): + data_cfg['split'] = split[i] + datasets.append(build_dataset(data_cfg, default_args)) + + return ConcatDataset(datasets, separate_eval) + + +def build_dataset(cfg, default_args=None): + """Build datasets.""" + from .dataset_wrappers import (ConcatDataset, MultiImageMixDataset, + RepeatDataset) + if isinstance(cfg, (list, tuple)): + dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) + elif cfg['type'] == 'RepeatDataset': + dataset = RepeatDataset( + build_dataset(cfg['dataset'], default_args), cfg['times']) + elif cfg['type'] == 'MultiImageMixDataset': + cp_cfg = copy.deepcopy(cfg) + cp_cfg['dataset'] = build_dataset(cp_cfg['dataset']) + cp_cfg.pop('type') + dataset = MultiImageMixDataset(**cp_cfg) + elif isinstance(cfg.get('img_dir'), (list, tuple)) or isinstance( + cfg.get('split', None), (list, tuple)): + dataset = _concat_dataset(cfg, default_args) + else: + dataset = build_from_cfg(cfg, DATASETS, default_args) + + return dataset + + +def build_dataloader(dataset, + samples_per_gpu, + workers_per_gpu, + num_gpus=1, + dist=True, + shuffle=True, + seed=None, + drop_last=False, + pin_memory=True, + persistent_workers=True, + **kwargs): + """Build PyTorch DataLoader. + + In distributed training, each GPU/process has a dataloader. + In non-distributed training, there is only one dataloader for all GPUs. + + Args: + dataset (Dataset): A PyTorch dataset. + samples_per_gpu (int): Number of training samples on each GPU, i.e., + batch size of each GPU. + workers_per_gpu (int): How many subprocesses to use for data loading + for each GPU. + num_gpus (int): Number of GPUs. Only used in non-distributed training. + dist (bool): Distributed training/test or not. Default: True. + shuffle (bool): Whether to shuffle the data at every epoch. + Default: True. + seed (int | None): Seed to be used. Default: None. + drop_last (bool): Whether to drop the last incomplete batch in epoch. + Default: False + pin_memory (bool): Whether to use pin_memory in DataLoader. + Default: True + persistent_workers (bool): If True, the data loader will not shutdown + the worker processes after a dataset has been consumed once. + This allows to maintain the workers Dataset instances alive. + The argument also has effect in PyTorch>=1.7.0. + Default: True + kwargs: any keyword argument to be used to initialize DataLoader + + Returns: + DataLoader: A PyTorch dataloader. + """ + rank, world_size = get_dist_info() + if dist: + sampler = DistributedSampler( + dataset, world_size, rank, shuffle=shuffle, seed=seed) + shuffle = False + batch_size = samples_per_gpu + num_workers = workers_per_gpu + else: + sampler = None + batch_size = num_gpus * samples_per_gpu + num_workers = num_gpus * workers_per_gpu + + init_fn = partial( + worker_init_fn, num_workers=num_workers, rank=rank, + seed=seed) if seed is not None else None + + if digit_version(torch.__version__) >= digit_version('1.8.0'): + data_loader = DataLoader( + dataset, + batch_size=batch_size, + sampler=sampler, + num_workers=num_workers, + collate_fn=partial(collate, samples_per_gpu=samples_per_gpu), + pin_memory=pin_memory, + shuffle=shuffle, + worker_init_fn=init_fn, + drop_last=drop_last, + persistent_workers=persistent_workers, + **kwargs) + else: + data_loader = DataLoader( + dataset, + batch_size=batch_size, + sampler=sampler, + num_workers=num_workers, + collate_fn=partial(collate, samples_per_gpu=samples_per_gpu), + pin_memory=pin_memory, + shuffle=shuffle, + worker_init_fn=init_fn, + drop_last=drop_last, + **kwargs) + + return data_loader + + +def worker_init_fn(worker_id, num_workers, rank, seed): + """Worker init func for dataloader. + + The seed of each worker equals to num_worker * rank + worker_id + user_seed + + Args: + worker_id (int): Worker id. + num_workers (int): Number of workers. + rank (int): The rank of current process. + seed (int): The random seed to use. + """ + + worker_seed = num_workers * rank + worker_id + seed + np.random.seed(worker_seed) + random.seed(worker_seed) + torch.manual_seed(worker_seed) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/chase_db1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/chase_db1.py new file mode 100644 index 0000000..5cdc8d8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/chase_db1.py @@ -0,0 +1,27 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ChaseDB1Dataset(CustomDataset): + """Chase_db1 dataset. + + In segmentation map annotation for Chase_db1, 0 stands for background, + which is included in 2 categories. ``reduce_zero_label`` is fixed to False. + The ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_1stHO.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(ChaseDB1Dataset, self).__init__( + img_suffix='.png', + seg_map_suffix='_1stHO.png', + reduce_zero_label=False, + **kwargs) + assert self.file_client.exists(self.img_dir) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/cityscapes.py new file mode 100644 index 0000000..ed633d0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/cityscapes.py @@ -0,0 +1,214 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +import mmcv +import numpy as np +from mmcv.utils import print_log +from PIL import Image + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class CityscapesDataset(CustomDataset): + """Cityscapes dataset. + + The ``img_suffix`` is fixed to '_leftImg8bit.png' and ``seg_map_suffix`` is + fixed to '_gtFine_labelTrainIds.png' for Cityscapes dataset. + """ + + CLASSES = ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole', + 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', + 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', + 'bicycle') + + PALETTE = [[128, 64, 128], [244, 35, 232], [70, 70, 70], [102, 102, 156], + [190, 153, 153], [153, 153, 153], [250, 170, 30], [220, 220, 0], + [107, 142, 35], [152, 251, 152], [70, 130, 180], [220, 20, 60], + [255, 0, 0], [0, 0, 142], [0, 0, 70], [0, 60, 100], + [0, 80, 100], [0, 0, 230], [119, 11, 32]] + + def __init__(self, + img_suffix='_leftImg8bit.png', + seg_map_suffix='_gtFine_labelTrainIds.png', + **kwargs): + super(CityscapesDataset, self).__init__( + img_suffix=img_suffix, seg_map_suffix=seg_map_suffix, **kwargs) + + @staticmethod + def _convert_to_label_id(result): + """Convert trainId to id for cityscapes.""" + if isinstance(result, str): + result = np.load(result) + import cityscapesscripts.helpers.labels as CSLabels + result_copy = result.copy() + for trainId, label in CSLabels.trainId2label.items(): + result_copy[result == trainId] = label.id + + return result_copy + + def results2img(self, results, imgfile_prefix, to_label_id, indices=None): + """Write the segmentation results to images. + + Args: + results (list[ndarray]): Testing results of the + dataset. + imgfile_prefix (str): The filename prefix of the png files. + If the prefix is "somepath/xxx", + the png files will be named "somepath/xxx.png". + to_label_id (bool): whether convert output to label_id for + submission. + indices (list[int], optional): Indices of input results, + if not set, all the indices of the dataset will be used. + Default: None. + + Returns: + list[str: str]: result txt files which contains corresponding + semantic segmentation images. + """ + if indices is None: + indices = list(range(len(self))) + + mmcv.mkdir_or_exist(imgfile_prefix) + result_files = [] + for result, idx in zip(results, indices): + if to_label_id: + result = self._convert_to_label_id(result) + filename = self.img_infos[idx]['filename'] + basename = osp.splitext(osp.basename(filename))[0] + + png_filename = osp.join(imgfile_prefix, f'{basename}.png') + + output = Image.fromarray(result.astype(np.uint8)).convert('P') + import cityscapesscripts.helpers.labels as CSLabels + palette = np.zeros((len(CSLabels.id2label), 3), dtype=np.uint8) + for label_id, label in CSLabels.id2label.items(): + palette[label_id] = label.color + + output.putpalette(palette) + output.save(png_filename) + result_files.append(png_filename) + + return result_files + + def format_results(self, + results, + imgfile_prefix, + to_label_id=True, + indices=None): + """Format the results into dir (standard format for Cityscapes + evaluation). + + Args: + results (list): Testing results of the dataset. + imgfile_prefix (str): The prefix of images files. It + includes the file path and the prefix of filename, e.g., + "a/b/prefix". + to_label_id (bool): whether convert output to label_id for + submission. Default: False + indices (list[int], optional): Indices of input results, + if not set, all the indices of the dataset will be used. + Default: None. + + Returns: + tuple: (result_files, tmp_dir), result_files is a list containing + the image paths, tmp_dir is the temporal directory created + for saving json/png files when img_prefix is not specified. + """ + if indices is None: + indices = list(range(len(self))) + + assert isinstance(results, list), 'results must be a list.' + assert isinstance(indices, list), 'indices must be a list.' + + result_files = self.results2img(results, imgfile_prefix, to_label_id, + indices) + + return result_files + + def evaluate(self, + results, + metric='mIoU', + logger=None, + imgfile_prefix=None): + """Evaluation in Cityscapes/default protocol. + + Args: + results (list): Testing results of the dataset. + metric (str | list[str]): Metrics to be evaluated. + logger (logging.Logger | None | str): Logger used for printing + related information during evaluation. Default: None. + imgfile_prefix (str | None): The prefix of output image file, + for cityscapes evaluation only. It includes the file path and + the prefix of filename, e.g., "a/b/prefix". + If results are evaluated with cityscapes protocol, it would be + the prefix of output png files. The output files would be + png images under folder "a/b/prefix/xxx.png", where "xxx" is + the image name of cityscapes. If not specified, a temp file + will be created for evaluation. + Default: None. + + Returns: + dict[str, float]: Cityscapes/default metrics. + """ + + eval_results = dict() + metrics = metric.copy() if isinstance(metric, list) else [metric] + if 'cityscapes' in metrics: + eval_results.update( + self._evaluate_cityscapes(results, logger, imgfile_prefix)) + metrics.remove('cityscapes') + if len(metrics) > 0: + eval_results.update( + super(CityscapesDataset, + self).evaluate(results, metrics, logger)) + + return eval_results + + def _evaluate_cityscapes(self, results, logger, imgfile_prefix): + """Evaluation in Cityscapes protocol. + + Args: + results (list): Testing results of the dataset. + logger (logging.Logger | str | None): Logger used for printing + related information during evaluation. Default: None. + imgfile_prefix (str | None): The prefix of output image file + + Returns: + dict[str: float]: Cityscapes evaluation results. + """ + try: + import cityscapesscripts.evaluation.evalPixelLevelSemanticLabeling as CSEval # noqa + except ImportError: + raise ImportError('Please run "pip install cityscapesscripts" to ' + 'install cityscapesscripts first.') + msg = 'Evaluating in Cityscapes style' + if logger is None: + msg = '\n' + msg + print_log(msg, logger=logger) + + result_dir = imgfile_prefix + + eval_results = dict() + print_log(f'Evaluating results under {result_dir} ...', logger=logger) + + CSEval.args.evalInstLevelScore = True + CSEval.args.predictionPath = osp.abspath(result_dir) + CSEval.args.evalPixelAccuracy = True + CSEval.args.JSONOutput = False + + seg_map_list = [] + pred_list = [] + + # when evaluating with official cityscapesscripts, + # **_gtFine_labelIds.png is used + for seg_map in mmcv.scandir( + self.ann_dir, 'gtFine_labelIds.png', recursive=True): + seg_map_list.append(osp.join(self.ann_dir, seg_map)) + pred_list.append(CSEval.getPrediction(CSEval.args, seg_map)) + + eval_results.update( + CSEval.evaluateImgLists(pred_list, seg_map_list, CSEval.args)) + + return eval_results diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/coco_stuff.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/coco_stuff.py new file mode 100644 index 0000000..24d0895 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/coco_stuff.py @@ -0,0 +1,94 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class COCOStuffDataset(CustomDataset): + """COCO-Stuff dataset. + + In segmentation map annotation for COCO-Stuff, Train-IDs of the 10k version + are from 1 to 171, where 0 is the ignore index, and Train-ID of COCO Stuff + 164k is from 0 to 170, where 255 is the ignore index. So, they are all 171 + semantic categories. ``reduce_zero_label`` is set to True and False for the + 10k and 164k versions, respectively. The ``img_suffix`` is fixed to '.jpg', + and ``seg_map_suffix`` is fixed to '.png'. + """ + CLASSES = ( + 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', + 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', + 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', + 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', + 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', + 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', + 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', + 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', + 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', + 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', + 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', + 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', + 'scissors', 'teddy bear', 'hair drier', 'toothbrush', 'banner', + 'blanket', 'branch', 'bridge', 'building-other', 'bush', 'cabinet', + 'cage', 'cardboard', 'carpet', 'ceiling-other', 'ceiling-tile', + 'cloth', 'clothes', 'clouds', 'counter', 'cupboard', 'curtain', + 'desk-stuff', 'dirt', 'door-stuff', 'fence', 'floor-marble', + 'floor-other', 'floor-stone', 'floor-tile', 'floor-wood', + 'flower', 'fog', 'food-other', 'fruit', 'furniture-other', 'grass', + 'gravel', 'ground-other', 'hill', 'house', 'leaves', 'light', 'mat', + 'metal', 'mirror-stuff', 'moss', 'mountain', 'mud', 'napkin', 'net', + 'paper', 'pavement', 'pillow', 'plant-other', 'plastic', 'platform', + 'playingfield', 'railing', 'railroad', 'river', 'road', 'rock', 'roof', + 'rug', 'salad', 'sand', 'sea', 'shelf', 'sky-other', 'skyscraper', + 'snow', 'solid-other', 'stairs', 'stone', 'straw', 'structural-other', + 'table', 'tent', 'textile-other', 'towel', 'tree', 'vegetable', + 'wall-brick', 'wall-concrete', 'wall-other', 'wall-panel', + 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', 'waterdrops', + 'window-blind', 'window-other', 'wood') + + PALETTE = [[0, 192, 64], [0, 192, 64], [0, 64, 96], [128, 192, 192], + [0, 64, 64], [0, 192, 224], [0, 192, 192], [128, 192, 64], + [0, 192, 96], [128, 192, 64], [128, 32, 192], [0, 0, 224], + [0, 0, 64], [0, 160, 192], [128, 0, 96], [128, 0, 192], + [0, 32, 192], [128, 128, 224], [0, 0, 192], [128, 160, 192], + [128, 128, 0], [128, 0, 32], [128, 32, 0], [128, 0, 128], + [64, 128, 32], [0, 160, 0], [0, 0, 0], [192, 128, 160], + [0, 32, 0], [0, 128, 128], [64, 128, 160], [128, 160, 0], + [0, 128, 0], [192, 128, 32], [128, 96, 128], [0, 0, 128], + [64, 0, 32], [0, 224, 128], [128, 0, 0], [192, 0, 160], + [0, 96, 128], [128, 128, 128], [64, 0, 160], [128, 224, 128], + [128, 128, 64], [192, 0, 32], [128, 96, 0], [128, 0, 192], + [0, 128, 32], [64, 224, 0], [0, 0, 64], [128, 128, 160], + [64, 96, 0], [0, 128, 192], [0, 128, 160], [192, 224, 0], + [0, 128, 64], [128, 128, 32], [192, 32, 128], [0, 64, 192], + [0, 0, 32], [64, 160, 128], [128, 64, 64], [128, 0, 160], + [64, 32, 128], [128, 192, 192], [0, 0, 160], [192, 160, 128], + [128, 192, 0], [128, 0, 96], [192, 32, 0], [128, 64, 128], + [64, 128, 96], [64, 160, 0], [0, 64, 0], [192, 128, 224], + [64, 32, 0], [0, 192, 128], [64, 128, 224], [192, 160, 0], + [0, 192, 0], [192, 128, 96], [192, 96, 128], [0, 64, 128], + [64, 0, 96], [64, 224, 128], [128, 64, 0], [192, 0, 224], + [64, 96, 128], [128, 192, 128], [64, 0, 224], [192, 224, 128], + [128, 192, 64], [192, 0, 96], [192, 96, 0], [128, 64, 192], + [0, 128, 96], [0, 224, 0], [64, 64, 64], [128, 128, 224], + [0, 96, 0], [64, 192, 192], [0, 128, 224], [128, 224, 0], + [64, 192, 64], [128, 128, 96], [128, 32, 128], [64, 0, 192], + [0, 64, 96], [0, 160, 128], [192, 0, 64], [128, 64, 224], + [0, 32, 128], [192, 128, 192], [0, 64, 224], [128, 160, 128], + [192, 128, 0], [128, 64, 32], [128, 32, 64], [192, 0, 128], + [64, 192, 32], [0, 160, 64], [64, 0, 0], [192, 192, 160], + [0, 32, 64], [64, 128, 128], [64, 192, 160], [128, 160, 64], + [64, 128, 0], [192, 192, 32], [128, 96, 192], [64, 0, 128], + [64, 64, 32], [0, 224, 192], [192, 0, 0], [192, 64, 160], + [0, 96, 192], [192, 128, 128], [64, 64, 160], [128, 224, 192], + [192, 128, 64], [192, 64, 32], [128, 96, 64], [192, 0, 192], + [0, 192, 32], [64, 224, 64], [64, 0, 64], [128, 192, 160], + [64, 96, 64], [64, 128, 192], [0, 192, 160], [192, 224, 64], + [64, 128, 64], [128, 192, 32], [192, 32, 192], [64, 64, 192], + [0, 64, 32], [64, 160, 192], [192, 64, 64], [128, 64, 160], + [64, 32, 192], [192, 192, 192], [0, 64, 160], [192, 160, 192], + [192, 192, 0], [128, 64, 96], [192, 32, 64], [192, 64, 128], + [64, 192, 96], [64, 160, 64], [64, 64, 0]] + + def __init__(self, **kwargs): + super(COCOStuffDataset, self).__init__( + img_suffix='.jpg', seg_map_suffix='_labelTrainIds.png', **kwargs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/custom.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/custom.py new file mode 100644 index 0000000..f2f67aa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/custom.py @@ -0,0 +1,501 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import warnings +from collections import OrderedDict + +import mmcv +import numpy as np +from mmcv.utils import print_log +from prettytable import PrettyTable +from torch.utils.data import Dataset + +from mmseg.core import eval_metrics, intersect_and_union, pre_eval_to_metrics +from mmseg.utils import get_root_logger +from .builder import DATASETS +from .pipelines import Compose, LoadAnnotations + + +@DATASETS.register_module() +class CustomDataset(Dataset): + """Custom dataset for semantic segmentation. An example of file structure + is as followed. + + .. code-block:: none + + ├── data + │ ├── my_dataset + │ │ ├── img_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{img_suffix} + │ │ │ │ ├── yyy{img_suffix} + │ │ │ │ ├── zzz{img_suffix} + │ │ │ ├── val + │ │ ├── ann_dir + │ │ │ ├── train + │ │ │ │ ├── xxx{seg_map_suffix} + │ │ │ │ ├── yyy{seg_map_suffix} + │ │ │ │ ├── zzz{seg_map_suffix} + │ │ │ ├── val + + The img/gt_semantic_seg pair of CustomDataset should be of the same + except suffix. A valid img/gt_semantic_seg filename pair should be like + ``xxx{img_suffix}`` and ``xxx{seg_map_suffix}`` (extension is also included + in the suffix). If split is given, then ``xxx`` is specified in txt file. + Otherwise, all files in ``img_dir/``and ``ann_dir`` will be loaded. + Please refer to ``docs/en/tutorials/new_dataset.md`` for more details. + + + Args: + pipeline (list[dict]): Processing pipeline + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. Default: '.jpg' + ann_dir (str, optional): Path to annotation directory. Default: None + seg_map_suffix (str): Suffix of segmentation maps. Default: '.png' + split (str, optional): Split txt file. If split is specified, only + file with suffix in the splits will be loaded. Otherwise, all + images in img_dir/ann_dir will be loaded. Default: None + data_root (str, optional): Data root for img_dir/ann_dir. Default: + None. + test_mode (bool): If test_mode=True, gt wouldn't be loaded. + ignore_index (int): The label index to be ignored. Default: 255 + reduce_zero_label (bool): Whether to mark label zero as ignored. + Default: False + classes (str | Sequence[str], optional): Specify classes to load. + If is None, ``cls.CLASSES`` will be used. Default: None. + palette (Sequence[Sequence[int]]] | np.ndarray | None): + The palette of segmentation map. If None is given, and + self.PALETTE is None, random palette will be generated. + Default: None + gt_seg_map_loader_cfg (dict, optional): build LoadAnnotations to + load gt for evaluation, load from disk by default. Default: None. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + """ + + CLASSES = ('background', 'black', 'brown', 'rainbow', 'silver') + # Đen xanh coban nâu Vàng Tím + PALETTE = [[0,0,0], [0,0,204], [180,180, 180], [255,255,0], [178, 102, 255]] + #PALETTE = [[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]] + + #CLASSES = ('background', 'black', 'silver') + #PALETTE = [[0, 0, 0], [1, 1, 1], [2, 2, 2]] + # Đen xanh coban Tím + #PALETTE = [[0,0,0], [0,0,204], [178, 102, 255]] + + # Infer for Black model + #CLASSES = ('background', 'black') + # Đen Tím + #PALETTE = [[0,0,0], [178, 102, 255]] + + + + def __init__(self, + pipeline, + img_dir, + img_suffix='.png', # Need to change + ann_dir=None, + seg_map_suffix='.png', # Need to change + split=None, + data_root=None, + test_mode=False, + ignore_index=255, + reduce_zero_label=False,# Day la lenh de bo cac label 0 tuc background + #reduce_zero_label=True, + classes=None, + palette=None, + gt_seg_map_loader_cfg=None, + file_client_args=dict(backend='disk')): + self.pipeline = Compose(pipeline) + self.img_dir = img_dir + self.img_suffix = img_suffix + self.ann_dir = ann_dir + self.seg_map_suffix = seg_map_suffix + self.split = split + self.data_root = data_root + self.test_mode = test_mode + self.ignore_index = ignore_index + self.reduce_zero_label = reduce_zero_label + self.label_map = None + self.CLASSES, self.PALETTE = self.get_classes_and_palette( + classes, palette) + self.gt_seg_map_loader = LoadAnnotations( + ) if gt_seg_map_loader_cfg is None else LoadAnnotations( + **gt_seg_map_loader_cfg) + + self.file_client_args = file_client_args + self.file_client = mmcv.FileClient.infer_client(self.file_client_args) + + if test_mode: + assert self.CLASSES is not None, \ + '`cls.CLASSES` or `classes` should be specified when testing' + + # join paths if data_root is specified + if self.data_root is not None: + if not osp.isabs(self.img_dir): + self.img_dir = osp.join(self.data_root, self.img_dir) + if not (self.ann_dir is None or osp.isabs(self.ann_dir)): + self.ann_dir = osp.join(self.data_root, self.ann_dir) + if not (self.split is None or osp.isabs(self.split)): + self.split = osp.join(self.data_root, self.split) + + # load annotations + self.img_infos = self.load_annotations(self.img_dir, self.img_suffix, + self.ann_dir, + self.seg_map_suffix, self.split) + + def __len__(self): + """Total number of samples of data.""" + return len(self.img_infos) + + def load_annotations(self, img_dir, img_suffix, ann_dir, seg_map_suffix, + split): + """Load annotation from directory. + + Args: + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. + ann_dir (str|None): Path to annotation directory. + seg_map_suffix (str|None): Suffix of segmentation maps. + split (str|None): Split txt file. If split is specified, only file + with suffix in the splits will be loaded. Otherwise, all images + in img_dir/ann_dir will be loaded. Default: None + + Returns: + list[dict]: All image info of dataset. + """ + + img_infos = [] + if split is not None: + lines = mmcv.list_from_file( + split, file_client_args=self.file_client_args) + for line in lines: + img_name = line.strip() + img_info = dict(filename=img_name + img_suffix) + if ann_dir is not None: + seg_map = img_name + seg_map_suffix + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + else: + for img in self.file_client.list_dir_or_file( + dir_path=img_dir, + list_dir=False, + suffix=img_suffix, + recursive=True): + img_info = dict(filename=img) + if ann_dir is not None: + seg_map = img.replace(img_suffix, seg_map_suffix) + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + img_infos = sorted(img_infos, key=lambda x: x['filename']) + + print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger()) + return img_infos + + def get_ann_info(self, idx): + """Get annotation by index. + + Args: + idx (int): Index of data. + + Returns: + dict: Annotation info of specified index. + """ + + return self.img_infos[idx]['ann'] + + def pre_pipeline(self, results): + """Prepare results dict for pipeline.""" + results['seg_fields'] = [] + results['img_prefix'] = self.img_dir + results['seg_prefix'] = self.ann_dir + if self.custom_classes: + results['label_map'] = self.label_map + + def __getitem__(self, idx): + """Get training/test data after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training/test data (with annotation if `test_mode` is set + False). + """ + + if self.test_mode: + return self.prepare_test_img(idx) + else: + return self.prepare_train_img(idx) + + def prepare_train_img(self, idx): + """Get training data and annotations after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Training data and annotation after pipeline with new keys + introduced by pipeline. + """ + + img_info = self.img_infos[idx] + ann_info = self.get_ann_info(idx) + results = dict(img_info=img_info, ann_info=ann_info) + self.pre_pipeline(results) + return self.pipeline(results) + + def prepare_test_img(self, idx): + """Get testing data after pipeline. + + Args: + idx (int): Index of data. + + Returns: + dict: Testing data after pipeline with new keys introduced by + pipeline. + """ + + img_info = self.img_infos[idx] + results = dict(img_info=img_info) + self.pre_pipeline(results) + return self.pipeline(results) + + def format_results(self, results, imgfile_prefix, indices=None, **kwargs): + """Place holder to format result to dataset specific output.""" + raise NotImplementedError + + def get_gt_seg_map_by_idx(self, index): + """Get one ground truth segmentation map for evaluation.""" + ann_info = self.get_ann_info(index) + results = dict(ann_info=ann_info) + self.pre_pipeline(results) + self.gt_seg_map_loader(results) + return results['gt_semantic_seg'] + + def get_gt_seg_maps(self, efficient_test=None): + """Get ground truth segmentation maps for evaluation.""" + if efficient_test is not None: + warnings.warn( + 'DeprecationWarning: ``efficient_test`` has been deprecated ' + 'since MMSeg v0.16, the ``get_gt_seg_maps()`` is CPU memory ' + 'friendly by default. ') + + for idx in range(len(self)): + ann_info = self.get_ann_info(idx) + results = dict(ann_info=ann_info) + self.pre_pipeline(results) + self.gt_seg_map_loader(results) + yield results['gt_semantic_seg'] + + def pre_eval(self, preds, indices): + """Collect eval result from each iteration. + + Args: + preds (list[torch.Tensor] | torch.Tensor): the segmentation logit + after argmax, shape (N, H, W). + indices (list[int] | int): the prediction related ground truth + indices. + + Returns: + list[torch.Tensor]: (area_intersect, area_union, area_prediction, + area_ground_truth). + """ + # In order to compat with batch inference + if not isinstance(indices, list): + indices = [indices] + if not isinstance(preds, list): + preds = [preds] + + pre_eval_results = [] + + for pred, index in zip(preds, indices): + seg_map = self.get_gt_seg_map_by_idx(index) + pre_eval_results.append( + intersect_and_union( + pred, + seg_map, + len(self.CLASSES), + self.ignore_index, + # as the labels has been converted when dataset initialized + # in `get_palette_for_custom_classes ` this `label_map` + # should be `dict()`, see + # https://github.com/open-mmlab/mmsegmentation/issues/1415 + # for more ditails + label_map=dict(), + reduce_zero_label=self.reduce_zero_label)) + + return pre_eval_results + + def get_classes_and_palette(self, classes=None, palette=None): + """Get class names of current dataset. + + Args: + classes (Sequence[str] | str | None): If classes is None, use + default CLASSES defined by builtin dataset. If classes is a + string, take it as a file name. The file contains the name of + classes where each line contains one class name. If classes is + a tuple or list, override the CLASSES defined by the dataset. + palette (Sequence[Sequence[int]]] | np.ndarray | None): + The palette of segmentation map. If None is given, random + palette will be generated. Default: None + """ + if classes is None: + self.custom_classes = False + return self.CLASSES, self.PALETTE + + self.custom_classes = True + if isinstance(classes, str): + # take it as a file path + class_names = mmcv.list_from_file(classes) + elif isinstance(classes, (tuple, list)): + class_names = classes + else: + raise ValueError(f'Unsupported type {type(classes)} of classes.') + + if self.CLASSES: + if not set(class_names).issubset(self.CLASSES): + raise ValueError('classes is not a subset of CLASSES.') + + # dictionary, its keys are the old label ids and its values + # are the new label ids. + # used for changing pixel labels in load_annotations. + self.label_map = {} + for i, c in enumerate(self.CLASSES): + if c not in class_names: + self.label_map[i] = -1 + else: + self.label_map[i] = class_names.index(c) + + palette = self.get_palette_for_custom_classes(class_names, palette) + + return class_names, palette + + def get_palette_for_custom_classes(self, class_names, palette=None): + + if self.label_map is not None: + # return subset of palette + palette = [] + for old_id, new_id in sorted( + self.label_map.items(), key=lambda x: x[1]): + if new_id != -1: + palette.append(self.PALETTE[old_id]) + palette = type(self.PALETTE)(palette) + + elif palette is None: + if self.PALETTE is None: + # Get random state before set seed, and restore + # random state later. + # It will prevent loss of randomness, as the palette + # may be different in each iteration if not specified. + # See: https://github.com/open-mmlab/mmdetection/issues/5844 + state = np.random.get_state() + np.random.seed(42) + # random palette + palette = np.random.randint(0, 255, size=(len(class_names), 3)) + np.random.set_state(state) + else: + palette = self.PALETTE + + return palette + + def evaluate(self, + results, + metric='mIoU', + logger=None, + gt_seg_maps=None, + **kwargs): + """Evaluate the dataset. + + Args: + results (list[tuple[torch.Tensor]] | list[str]): per image pre_eval + results or predict segmentation map for computing evaluation + metric. + metric (str | list[str]): Metrics to be evaluated. 'mIoU', + 'mDice' and 'mFscore' are supported. + logger (logging.Logger | None | str): Logger used for printing + related information during evaluation. Default: None. + gt_seg_maps (generator[ndarray]): Custom gt seg maps as input, + used in ConcatDataset + + Returns: + dict[str, float]: Default metrics. + """ + if isinstance(metric, str): + metric = [metric] + allowed_metrics = ['mIoU', 'mDice', 'mFscore'] + if not set(metric).issubset(set(allowed_metrics)): + raise KeyError('metric {} is not supported'.format(metric)) + + eval_results = {} + # test a list of files + if mmcv.is_list_of(results, np.ndarray) or mmcv.is_list_of( + results, str): + if gt_seg_maps is None: + gt_seg_maps = self.get_gt_seg_maps() + num_classes = len(self.CLASSES) + ret_metrics = eval_metrics( + results, + gt_seg_maps, + num_classes, + self.ignore_index, + metric, + label_map=dict(), + reduce_zero_label=self.reduce_zero_label) + # test a list of pre_eval_results + else: + ret_metrics = pre_eval_to_metrics(results, metric) + + # Because dataset.CLASSES is required for per-eval. + if self.CLASSES is None: + class_names = tuple(range(num_classes)) + else: + class_names = self.CLASSES + + # summary table + ret_metrics_summary = OrderedDict({ + ret_metric: np.round(np.nanmean(ret_metric_value) * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + }) + + # each class table + ret_metrics.pop('aAcc', None) + ret_metrics_class = OrderedDict({ + ret_metric: np.round(ret_metric_value * 100, 2) + for ret_metric, ret_metric_value in ret_metrics.items() + }) + ret_metrics_class.update({'Class': class_names}) + ret_metrics_class.move_to_end('Class', last=False) + + # for logger + class_table_data = PrettyTable() + for key, val in ret_metrics_class.items(): + class_table_data.add_column(key, val) + + summary_table_data = PrettyTable() + for key, val in ret_metrics_summary.items(): + if key == 'aAcc': + summary_table_data.add_column(key, [val]) + else: + summary_table_data.add_column('m' + key, [val]) + + print_log('per class results:', logger) + print_log('\n' + class_table_data.get_string(), logger=logger) + print_log('Summary:', logger) + print_log('\n' + summary_table_data.get_string(), logger=logger) + + # each metric dict + for key, value in ret_metrics_summary.items(): + if key == 'aAcc': + eval_results[key] = value / 100.0 + else: + eval_results['m' + key] = value / 100.0 + + ret_metrics_class.pop('Class', None) + for key, value in ret_metrics_class.items(): + eval_results.update({ + key + '.' + str(name): value[idx] / 100.0 + for idx, name in enumerate(class_names) + }) + + return eval_results diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/dark_zurich.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/dark_zurich.py new file mode 100644 index 0000000..0b6fda6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/dark_zurich.py @@ -0,0 +1,14 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .builder import DATASETS +from .cityscapes import CityscapesDataset + + +@DATASETS.register_module() +class DarkZurichDataset(CityscapesDataset): + """DarkZurichDataset dataset.""" + + def __init__(self, **kwargs): + super().__init__( + img_suffix='_rgb_anon.png', + seg_map_suffix='_gt_labelTrainIds.png', + **kwargs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/dataset_wrappers.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/dataset_wrappers.py new file mode 100644 index 0000000..1fb089f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/dataset_wrappers.py @@ -0,0 +1,277 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import bisect +import collections +import copy +from itertools import chain + +import mmcv +import numpy as np +from mmcv.utils import build_from_cfg, print_log +from torch.utils.data.dataset import ConcatDataset as _ConcatDataset + +from .builder import DATASETS, PIPELINES +from .cityscapes import CityscapesDataset + + +@DATASETS.register_module() +class ConcatDataset(_ConcatDataset): + """A wrapper of concatenated dataset. + + Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but + support evaluation and formatting results + + Args: + datasets (list[:obj:`Dataset`]): A list of datasets. + separate_eval (bool): Whether to evaluate the concatenated + dataset results separately, Defaults to True. + """ + + def __init__(self, datasets, separate_eval=True): + super(ConcatDataset, self).__init__(datasets) + self.CLASSES = datasets[0].CLASSES + self.PALETTE = datasets[0].PALETTE + self.separate_eval = separate_eval + assert separate_eval in [True, False], \ + f'separate_eval can only be True or False,' \ + f'but get {separate_eval}' + if any([isinstance(ds, CityscapesDataset) for ds in datasets]): + raise NotImplementedError( + 'Evaluating ConcatDataset containing CityscapesDataset' + 'is not supported!') + + def evaluate(self, results, logger=None, **kwargs): + """Evaluate the results. + + Args: + results (list[tuple[torch.Tensor]] | list[str]]): per image + pre_eval results or predict segmentation map for + computing evaluation metric. + logger (logging.Logger | str | None): Logger used for printing + related information during evaluation. Default: None. + + Returns: + dict[str: float]: evaluate results of the total dataset + or each separate + dataset if `self.separate_eval=True`. + """ + assert len(results) == self.cumulative_sizes[-1], \ + ('Dataset and results have different sizes: ' + f'{self.cumulative_sizes[-1]} v.s. {len(results)}') + + # Check whether all the datasets support evaluation + for dataset in self.datasets: + assert hasattr(dataset, 'evaluate'), \ + f'{type(dataset)} does not implement evaluate function' + + if self.separate_eval: + dataset_idx = -1 + total_eval_results = dict() + for size, dataset in zip(self.cumulative_sizes, self.datasets): + start_idx = 0 if dataset_idx == -1 else \ + self.cumulative_sizes[dataset_idx] + end_idx = self.cumulative_sizes[dataset_idx + 1] + + results_per_dataset = results[start_idx:end_idx] + print_log( + f'\nEvaluateing {dataset.img_dir} with ' + f'{len(results_per_dataset)} images now', + logger=logger) + + eval_results_per_dataset = dataset.evaluate( + results_per_dataset, logger=logger, **kwargs) + dataset_idx += 1 + for k, v in eval_results_per_dataset.items(): + total_eval_results.update({f'{dataset_idx}_{k}': v}) + + return total_eval_results + + if len(set([type(ds) for ds in self.datasets])) != 1: + raise NotImplementedError( + 'All the datasets should have same types when ' + 'self.separate_eval=False') + else: + if mmcv.is_list_of(results, np.ndarray) or mmcv.is_list_of( + results, str): + # merge the generators of gt_seg_maps + gt_seg_maps = chain( + *[dataset.get_gt_seg_maps() for dataset in self.datasets]) + else: + # if the results are `pre_eval` results, + # we do not need gt_seg_maps to evaluate + gt_seg_maps = None + eval_results = self.datasets[0].evaluate( + results, gt_seg_maps=gt_seg_maps, logger=logger, **kwargs) + return eval_results + + def get_dataset_idx_and_sample_idx(self, indice): + """Return dataset and sample index when given an indice of + ConcatDataset. + + Args: + indice (int): indice of sample in ConcatDataset + + Returns: + int: the index of sub dataset the sample belong to + int: the index of sample in its corresponding subset + """ + if indice < 0: + if -indice > len(self): + raise ValueError( + 'absolute value of index should not exceed dataset length') + indice = len(self) + indice + dataset_idx = bisect.bisect_right(self.cumulative_sizes, indice) + if dataset_idx == 0: + sample_idx = indice + else: + sample_idx = indice - self.cumulative_sizes[dataset_idx - 1] + return dataset_idx, sample_idx + + def format_results(self, results, imgfile_prefix, indices=None, **kwargs): + """format result for every sample of ConcatDataset.""" + if indices is None: + indices = list(range(len(self))) + + assert isinstance(results, list), 'results must be a list.' + assert isinstance(indices, list), 'indices must be a list.' + + ret_res = [] + for i, indice in enumerate(indices): + dataset_idx, sample_idx = self.get_dataset_idx_and_sample_idx( + indice) + res = self.datasets[dataset_idx].format_results( + [results[i]], + imgfile_prefix + f'/{dataset_idx}', + indices=[sample_idx], + **kwargs) + ret_res.append(res) + return sum(ret_res, []) + + def pre_eval(self, preds, indices): + """do pre eval for every sample of ConcatDataset.""" + # In order to compat with batch inference + if not isinstance(indices, list): + indices = [indices] + if not isinstance(preds, list): + preds = [preds] + ret_res = [] + for i, indice in enumerate(indices): + dataset_idx, sample_idx = self.get_dataset_idx_and_sample_idx( + indice) + res = self.datasets[dataset_idx].pre_eval(preds[i], sample_idx) + ret_res.append(res) + return sum(ret_res, []) + + +@DATASETS.register_module() +class RepeatDataset(object): + """A wrapper of repeated dataset. + + The length of repeated dataset will be `times` larger than the original + dataset. This is useful when the data loading time is long but the dataset + is small. Using RepeatDataset can reduce the data loading time between + epochs. + + Args: + dataset (:obj:`Dataset`): The dataset to be repeated. + times (int): Repeat times. + """ + + def __init__(self, dataset, times): + self.dataset = dataset + self.times = times + self.CLASSES = dataset.CLASSES + self.PALETTE = dataset.PALETTE + self._ori_len = len(self.dataset) + + def __getitem__(self, idx): + """Get item from original dataset.""" + return self.dataset[idx % self._ori_len] + + def __len__(self): + """The length is multiplied by ``times``""" + return self.times * self._ori_len + + +@DATASETS.register_module() +class MultiImageMixDataset: + """A wrapper of multiple images mixed dataset. + + Suitable for training on multiple images mixed data augmentation like + mosaic and mixup. For the augmentation pipeline of mixed image data, + the `get_indexes` method needs to be provided to obtain the image + indexes, and you can set `skip_flags` to change the pipeline running + process. + + + Args: + dataset (:obj:`CustomDataset`): The dataset to be mixed. + pipeline (Sequence[dict]): Sequence of transform object or + config dict to be composed. + skip_type_keys (list[str], optional): Sequence of type string to + be skip pipeline. Default to None. + """ + + def __init__(self, dataset, pipeline, skip_type_keys=None): + assert isinstance(pipeline, collections.abc.Sequence) + if skip_type_keys is not None: + assert all([ + isinstance(skip_type_key, str) + for skip_type_key in skip_type_keys + ]) + self._skip_type_keys = skip_type_keys + + self.pipeline = [] + self.pipeline_types = [] + for transform in pipeline: + if isinstance(transform, dict): + self.pipeline_types.append(transform['type']) + transform = build_from_cfg(transform, PIPELINES) + self.pipeline.append(transform) + else: + raise TypeError('pipeline must be a dict') + + self.dataset = dataset + self.CLASSES = dataset.CLASSES + self.PALETTE = dataset.PALETTE + self.num_samples = len(dataset) + + def __len__(self): + return self.num_samples + + def __getitem__(self, idx): + results = copy.deepcopy(self.dataset[idx]) + for (transform, transform_type) in zip(self.pipeline, + self.pipeline_types): + if self._skip_type_keys is not None and \ + transform_type in self._skip_type_keys: + continue + + if hasattr(transform, 'get_indexes'): + indexes = transform.get_indexes(self.dataset) + if not isinstance(indexes, collections.abc.Sequence): + indexes = [indexes] + mix_results = [ + copy.deepcopy(self.dataset[index]) for index in indexes + ] + results['mix_results'] = mix_results + + results = transform(results) + + if 'mix_results' in results: + results.pop('mix_results') + + return results + + def update_skip_type_keys(self, skip_type_keys): + """Update skip_type_keys. + + It is called by an external hook. + + Args: + skip_type_keys (list[str], optional): Sequence of type + string to be skip pipeline. + """ + assert all([ + isinstance(skip_type_key, str) for skip_type_key in skip_type_keys + ]) + self._skip_type_keys = skip_type_keys diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/drive.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/drive.py new file mode 100644 index 0000000..d44fb0d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/drive.py @@ -0,0 +1,27 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class DRIVEDataset(CustomDataset): + """DRIVE dataset. + + In segmentation map annotation for DRIVE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_manual1.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(DRIVEDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='_manual1.png', + reduce_zero_label=False, + **kwargs) + assert self.file_client.exists(self.img_dir) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/hrf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/hrf.py new file mode 100644 index 0000000..cf3ea8d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/hrf.py @@ -0,0 +1,27 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class HRFDataset(CustomDataset): + """HRF dataset. + + In segmentation map annotation for HRF, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(HRFDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=False, + **kwargs) + assert self.file_client.exists(self.img_dir) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/isaid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/isaid.py new file mode 100644 index 0000000..db24f93 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/isaid.py @@ -0,0 +1,82 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import mmcv +from mmcv.utils import print_log + +from ..utils import get_root_logger +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class iSAIDDataset(CustomDataset): + """ iSAID: A Large-scale Dataset for Instance Segmentation in Aerial Images + In segmentation map annotation for iSAID dataset, which is included + in 16 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '_manual1.png'. + """ + + CLASSES = ('background', 'ship', 'store_tank', 'baseball_diamond', + 'tennis_court', 'basketball_court', 'Ground_Track_Field', + 'Bridge', 'Large_Vehicle', 'Small_Vehicle', 'Helicopter', + 'Swimming_pool', 'Roundabout', 'Soccer_ball_field', 'plane', + 'Harbor') + + PALETTE = [[0, 0, 0], [0, 0, 63], [0, 63, 63], [0, 63, 0], [0, 63, 127], + [0, 63, 191], [0, 63, 255], [0, 127, 63], [0, 127, 127], + [0, 0, 127], [0, 0, 191], [0, 0, 255], [0, 191, 127], + [0, 127, 191], [0, 127, 255], [0, 100, 155]] + + def __init__(self, **kwargs): + super(iSAIDDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.png', + ignore_index=255, + **kwargs) + assert self.file_client.exists(self.img_dir) + + def load_annotations(self, + img_dir, + img_suffix, + ann_dir, + seg_map_suffix=None, + split=None): + """Load annotation from directory. + + Args: + img_dir (str): Path to image directory + img_suffix (str): Suffix of images. + ann_dir (str|None): Path to annotation directory. + seg_map_suffix (str|None): Suffix of segmentation maps. + split (str|None): Split txt file. If split is specified, only file + with suffix in the splits will be loaded. Otherwise, all images + in img_dir/ann_dir will be loaded. Default: None + + Returns: + list[dict]: All image info of dataset. + """ + + img_infos = [] + if split is not None: + with open(split) as f: + for line in f: + name = line.strip() + img_info = dict(filename=name + img_suffix) + if ann_dir is not None: + ann_name = name + '_instance_color_RGB' + seg_map = ann_name + seg_map_suffix + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + else: + for img in mmcv.scandir(img_dir, img_suffix, recursive=True): + img_info = dict(filename=img) + if ann_dir is not None: + seg_img = img + seg_map = seg_img.replace( + img_suffix, '_instance_color_RGB' + seg_map_suffix) + img_info['ann'] = dict(seg_map=seg_map) + img_infos.append(img_info) + + print_log(f'Loaded {len(img_infos)} images', logger=get_root_logger()) + return img_infos diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/isprs.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/isprs.py new file mode 100644 index 0000000..5f23e1a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/isprs.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class ISPRSDataset(CustomDataset): + """ISPRS dataset. + + In segmentation map annotation for LoveDA, 0 is the ignore index. + ``reduce_zero_label`` should be set to True. The ``img_suffix`` and + ``seg_map_suffix`` are both fixed to '.png'. + """ + CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree', + 'car', 'clutter') + + PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], + [255, 255, 0], [255, 0, 0]] + + def __init__(self, **kwargs): + super(ISPRSDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/loveda.py new file mode 100644 index 0000000..90d654f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/loveda.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +import mmcv +import numpy as np +from PIL import Image + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class LoveDADataset(CustomDataset): + """LoveDA dataset. + + In segmentation map annotation for LoveDA, 0 is the ignore index. + ``reduce_zero_label`` should be set to True. The ``img_suffix`` and + ``seg_map_suffix`` are both fixed to '.png'. + """ + CLASSES = ('background', 'building', 'road', 'water', 'barren', 'forest', + 'agricultural') + + PALETTE = [[255, 255, 255], [255, 0, 0], [255, 255, 0], [0, 0, 255], + [159, 129, 183], [0, 255, 0], [255, 195, 128]] + + def __init__(self, **kwargs): + super(LoveDADataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) + + def results2img(self, results, imgfile_prefix, indices=None): + """Write the segmentation results to images. + + Args: + results (list[ndarray]): Testing results of the + dataset. + imgfile_prefix (str): The filename prefix of the png files. + If the prefix is "somepath/xxx", + the png files will be named "somepath/xxx.png". + indices (list[int], optional): Indices of input results, if not + set, all the indices of the dataset will be used. + Default: None. + + Returns: + list[str: str]: result txt files which contains corresponding + semantic segmentation images. + """ + + mmcv.mkdir_or_exist(imgfile_prefix) + result_files = [] + for result, idx in zip(results, indices): + + filename = self.img_infos[idx]['filename'] + basename = osp.splitext(osp.basename(filename))[0] + + png_filename = osp.join(imgfile_prefix, f'{basename}.png') + + # The index range of official requirement is from 0 to 6. + output = Image.fromarray(result.astype(np.uint8)) + output.save(png_filename) + result_files.append(png_filename) + + return result_files + + def format_results(self, results, imgfile_prefix, indices=None): + """Format the results into dir (standard format for LoveDA evaluation). + + Args: + results (list): Testing results of the dataset. + imgfile_prefix (str): The prefix of images files. It + includes the file path and the prefix of filename, e.g., + "a/b/prefix". + indices (list[int], optional): Indices of input results, + if not set, all the indices of the dataset will be used. + Default: None. + + Returns: + tuple: (result_files, tmp_dir), result_files is a list containing + the image paths, tmp_dir is the temporal directory created + for saving json/png files when img_prefix is not specified. + """ + if indices is None: + indices = list(range(len(self))) + + assert isinstance(results, list), 'results must be a list.' + assert isinstance(indices, list), 'indices must be a list.' + + result_files = self.results2img(results, imgfile_prefix, indices) + + return result_files diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/night_driving.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/night_driving.py new file mode 100644 index 0000000..6620586 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/night_driving.py @@ -0,0 +1,14 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .builder import DATASETS +from .cityscapes import CityscapesDataset + + +@DATASETS.register_module() +class NightDrivingDataset(CityscapesDataset): + """NightDrivingDataset dataset.""" + + def __init__(self, **kwargs): + super().__init__( + img_suffix='_leftImg8bit.png', + seg_map_suffix='_gtCoarse_labelTrainIds.png', + **kwargs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pascal_context.py new file mode 100644 index 0000000..efacee0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pascal_context.py @@ -0,0 +1,103 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalContextDataset(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ('background', 'aeroplane', 'bag', 'bed', 'bedclothes', 'bench', + 'bicycle', 'bird', 'boat', 'book', 'bottle', 'building', 'bus', + 'cabinet', 'car', 'cat', 'ceiling', 'chair', 'cloth', + 'computer', 'cow', 'cup', 'curtain', 'dog', 'door', 'fence', + 'floor', 'flower', 'food', 'grass', 'ground', 'horse', + 'keyboard', 'light', 'motorbike', 'mountain', 'mouse', 'person', + 'plate', 'platform', 'pottedplant', 'road', 'rock', 'sheep', + 'shelves', 'sidewalk', 'sign', 'sky', 'snow', 'sofa', 'table', + 'track', 'train', 'tree', 'truck', 'tvmonitor', 'wall', 'water', + 'window', 'wood') + + PALETTE = [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], + [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], + [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], + [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], + [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], + [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], + [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], + [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], + [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], + [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], + [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], + [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], + [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], + [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], + [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255]] + + def __init__(self, split, **kwargs): + super(PascalContextDataset, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + split=split, + reduce_zero_label=False, + **kwargs) + assert self.file_client.exists(self.img_dir) and self.split is not None + + +@DATASETS.register_module() +class PascalContextDataset59(CustomDataset): + """PascalContext dataset. + + In segmentation map annotation for PascalContext, 0 stands for background, + which is included in 60 categories. ``reduce_zero_label`` is fixed to + False. The ``img_suffix`` is fixed to '.jpg' and ``seg_map_suffix`` is + fixed to '.png'. + + Args: + split (str): Split txt file for PascalContext. + """ + + CLASSES = ('aeroplane', 'bag', 'bed', 'bedclothes', 'bench', 'bicycle', + 'bird', 'boat', 'book', 'bottle', 'building', 'bus', 'cabinet', + 'car', 'cat', 'ceiling', 'chair', 'cloth', 'computer', 'cow', + 'cup', 'curtain', 'dog', 'door', 'fence', 'floor', 'flower', + 'food', 'grass', 'ground', 'horse', 'keyboard', 'light', + 'motorbike', 'mountain', 'mouse', 'person', 'plate', 'platform', + 'pottedplant', 'road', 'rock', 'sheep', 'shelves', 'sidewalk', + 'sign', 'sky', 'snow', 'sofa', 'table', 'track', 'train', + 'tree', 'truck', 'tvmonitor', 'wall', 'water', 'window', 'wood') + + PALETTE = [[180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], + [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], + [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], + [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], + [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], + [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], + [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], + [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], + [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], + [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], + [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], + [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], + [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], + [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], + [0, 235, 255], [0, 173, 255], [31, 0, 255]] + + def __init__(self, split, **kwargs): + super(PascalContextDataset59, self).__init__( + img_suffix='.jpg', + seg_map_suffix='.png', + split=split, + reduce_zero_label=True, + **kwargs) + assert self.file_client.exists(self.img_dir) and self.split is not None diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/__init__.py new file mode 100644 index 0000000..8256a6f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .compose import Compose +from .formatting import (Collect, ImageToTensor, ToDataContainer, ToTensor, + Transpose, to_tensor) +from .loading import LoadAnnotations, LoadImageFromFile +from .test_time_aug import MultiScaleFlipAug +from .transforms import (CLAHE, AdjustGamma, Normalize, Pad, + PhotoMetricDistortion, RandomCrop, RandomCutOut, + RandomFlip, RandomMosaic, RandomRotate, Rerange, + Resize, RGB2Gray, SegRescale) + +__all__ = [ + 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', + 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', + 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', + 'Normalize', 'SegRescale', 'PhotoMetricDistortion', 'RandomRotate', + 'AdjustGamma', 'CLAHE', 'Rerange', 'RGB2Gray', 'RandomCutOut', + 'RandomMosaic' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/compose.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/compose.py new file mode 100644 index 0000000..30280c1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/compose.py @@ -0,0 +1,52 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import collections + +from mmcv.utils import build_from_cfg + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class Compose(object): + """Compose multiple transforms sequentially. + + Args: + transforms (Sequence[dict | callable]): Sequence of transform object or + config dict to be composed. + """ + + def __init__(self, transforms): + assert isinstance(transforms, collections.abc.Sequence) + self.transforms = [] + for transform in transforms: + if isinstance(transform, dict): + transform = build_from_cfg(transform, PIPELINES) + self.transforms.append(transform) + elif callable(transform): + self.transforms.append(transform) + else: + raise TypeError('transform must be callable or a dict') + + def __call__(self, data): + """Call function to apply transforms sequentially. + + Args: + data (dict): A result dict contains the data to transform. + + Returns: + dict: Transformed data. + """ + + for t in self.transforms: + data = t(data) + if data is None: + return None + return data + + def __repr__(self): + format_string = self.__class__.__name__ + '(' + for t in self.transforms: + format_string += '\n' + format_string += f' {t}' + format_string += '\n)' + return format_string diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/formating.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/formating.py new file mode 100644 index 0000000..f6e53bf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/formating.py @@ -0,0 +1,9 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# flake8: noqa +import warnings + +from .formatting import * + +warnings.warn('DeprecationWarning: mmseg.datasets.pipelines.formating will be ' + 'deprecated in 2021, please replace it with ' + 'mmseg.datasets.pipelines.formatting.') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/formatting.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/formatting.py new file mode 100644 index 0000000..4e057c1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/formatting.py @@ -0,0 +1,289 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from collections.abc import Sequence + +import mmcv +import numpy as np +import torch +from mmcv.parallel import DataContainer as DC + +from ..builder import PIPELINES + + +def to_tensor(data): + """Convert objects of various python types to :obj:`torch.Tensor`. + + Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`, + :class:`Sequence`, :class:`int` and :class:`float`. + + Args: + data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to + be converted. + """ + + if isinstance(data, torch.Tensor): + return data + elif isinstance(data, np.ndarray): + return torch.from_numpy(data) + elif isinstance(data, Sequence) and not mmcv.is_str(data): + return torch.tensor(data) + elif isinstance(data, int): + return torch.LongTensor([data]) + elif isinstance(data, float): + return torch.FloatTensor([data]) + else: + raise TypeError(f'type {type(data)} cannot be converted to tensor.') + + +@PIPELINES.register_module() +class ToTensor(object): + """Convert some results to :obj:`torch.Tensor` by given keys. + + Args: + keys (Sequence[str]): Keys that need to be converted to Tensor. + """ + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Call function to convert data in results to :obj:`torch.Tensor`. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data converted + to :obj:`torch.Tensor`. + """ + + for key in self.keys: + results[key] = to_tensor(results[key]) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys})' + + +@PIPELINES.register_module() +class ImageToTensor(object): + """Convert image to :obj:`torch.Tensor` by given keys. + + The dimension order of input image is (H, W, C). The pipeline will convert + it to (C, H, W). If only 2 dimension (H, W) is given, the output would be + (1, H, W). + + Args: + keys (Sequence[str]): Key of images to be converted to Tensor. + """ + + def __init__(self, keys): + self.keys = keys + + def __call__(self, results): + """Call function to convert image in results to :obj:`torch.Tensor` and + transpose the channel order. + + Args: + results (dict): Result dict contains the image data to convert. + + Returns: + dict: The result dict contains the image converted + to :obj:`torch.Tensor` and transposed to (C, H, W) order. + """ + + for key in self.keys: + img = results[key] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + results[key] = to_tensor(img.transpose(2, 0, 1)) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(keys={self.keys})' + + +@PIPELINES.register_module() +class Transpose(object): + """Transpose some results by given keys. + + Args: + keys (Sequence[str]): Keys of results to be transposed. + order (Sequence[int]): Order of transpose. + """ + + def __init__(self, keys, order): + self.keys = keys + self.order = order + + def __call__(self, results): + """Call function to convert image in results to :obj:`torch.Tensor` and + transpose the channel order. + + Args: + results (dict): Result dict contains the image data to convert. + + Returns: + dict: The result dict contains the image converted + to :obj:`torch.Tensor` and transposed to (C, H, W) order. + """ + + for key in self.keys: + results[key] = results[key].transpose(self.order) + return results + + def __repr__(self): + return self.__class__.__name__ + \ + f'(keys={self.keys}, order={self.order})' + + +@PIPELINES.register_module() +class ToDataContainer(object): + """Convert results to :obj:`mmcv.DataContainer` by given fields. + + Args: + fields (Sequence[dict]): Each field is a dict like + ``dict(key='xxx', **kwargs)``. The ``key`` in result will + be converted to :obj:`mmcv.DataContainer` with ``**kwargs``. + Default: ``(dict(key='img', stack=True), + dict(key='gt_semantic_seg'))``. + """ + + def __init__(self, + fields=(dict(key='img', + stack=True), dict(key='gt_semantic_seg'))): + self.fields = fields + + def __call__(self, results): + """Call function to convert data in results to + :obj:`mmcv.DataContainer`. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data converted to + :obj:`mmcv.DataContainer`. + """ + + for field in self.fields: + field = field.copy() + key = field.pop('key') + results[key] = DC(results[key], **field) + return results + + def __repr__(self): + return self.__class__.__name__ + f'(fields={self.fields})' + + +@PIPELINES.register_module() +class DefaultFormatBundle(object): + """Default formatting bundle. + + It simplifies the pipeline of formatting common fields, including "img" + and "gt_semantic_seg". These fields are formatted as follows. + + - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) + - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, + (3)to DataContainer (stack=True) + """ + + def __call__(self, results): + """Call function to transform and format common fields in results. + + Args: + results (dict): Result dict contains the data to convert. + + Returns: + dict: The result dict contains the data that is formatted with + default bundle. + """ + + if 'img' in results: + img = results['img'] + if len(img.shape) < 3: + img = np.expand_dims(img, -1) + img = np.ascontiguousarray(img.transpose(2, 0, 1)) + results['img'] = DC(to_tensor(img), stack=True) + if 'gt_semantic_seg' in results: + # convert to long + results['gt_semantic_seg'] = DC( + to_tensor(results['gt_semantic_seg'][None, + ...].astype(np.int64)), + stack=True) + return results + + def __repr__(self): + return self.__class__.__name__ + + +@PIPELINES.register_module() +class Collect(object): + """Collect data from the loader relevant to the specific task. + + This is usually the last stage of the data loader pipeline. Typically keys + is set to some subset of "img", "gt_semantic_seg". + + The "img_meta" item is always populated. The contents of the "img_meta" + dictionary depends on "meta_keys". By default this includes: + + - "img_shape": shape of the image input to the network as a tuple + (h, w, c). Note that images may be zero padded on the bottom/right + if the batch tensor is larger than this shape. + + - "scale_factor": a float indicating the preprocessing scale + + - "flip": a boolean indicating if image flip transform was used + + - "filename": path to the image file + + - "ori_shape": original shape of the image as a tuple (h, w, c) + + - "pad_shape": image shape after padding + + - "img_norm_cfg": a dict of normalization information: + - mean - per channel mean subtraction + - std - per channel std divisor + - to_rgb - bool indicating if bgr was converted to rgb + + Args: + keys (Sequence[str]): Keys of results to be collected in ``data``. + meta_keys (Sequence[str], optional): Meta keys to be converted to + ``mmcv.DataContainer`` and collected in ``data[img_metas]``. + Default: (``filename``, ``ori_filename``, ``ori_shape``, + ``img_shape``, ``pad_shape``, ``scale_factor``, ``flip``, + ``flip_direction``, ``img_norm_cfg``) + """ + + def __init__(self, + keys, + meta_keys=('filename', 'ori_filename', 'ori_shape', + 'img_shape', 'pad_shape', 'scale_factor', 'flip', + 'flip_direction', 'img_norm_cfg')): + self.keys = keys + self.meta_keys = meta_keys + + def __call__(self, results): + """Call function to collect keys in results. The keys in ``meta_keys`` + will be converted to :obj:mmcv.DataContainer. + + Args: + results (dict): Result dict contains the data to collect. + + Returns: + dict: The result dict contains the following keys + - keys in``self.keys`` + - ``img_metas`` + """ + + data = {} + img_meta = {} + for key in self.meta_keys: + img_meta[key] = results[key] + data['img_metas'] = DC(img_meta, cpu_only=True) + for key in self.keys: + data[key] = results[key] + return data + + def __repr__(self): + return self.__class__.__name__ + \ + f'(keys={self.keys}, meta_keys={self.meta_keys})' diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/loading.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/loading.py new file mode 100644 index 0000000..572e434 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/loading.py @@ -0,0 +1,158 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +import mmcv +import numpy as np + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class LoadImageFromFile(object): + """Load an image from file. + + Required keys are "img_prefix" and "img_info" (a dict that must contain the + key "filename"). Added or updated keys are "filename", "img", "img_shape", + "ori_shape" (same as `img_shape`), "pad_shape" (same as `img_shape`), + "scale_factor" (1.0) and "img_norm_cfg" (means=0 and stds=1). + + Args: + to_float32 (bool): Whether to convert the loaded image to a float32 + numpy array. If set to False, the loaded image is an uint8 array. + Defaults to False. + color_type (str): The flag argument for :func:`mmcv.imfrombytes`. + Defaults to 'color'. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: + 'cv2' + """ + + def __init__(self, + to_float32=False, + color_type='color', + file_client_args=dict(backend='disk'), + imdecode_backend='cv2'): + self.to_float32 = to_float32 + self.color_type = color_type + self.file_client_args = file_client_args.copy() + self.file_client = None + self.imdecode_backend = imdecode_backend + + def __call__(self, results): + """Call functions to load image and get image meta information. + + Args: + results (dict): Result dict from :obj:`mmseg.CustomDataset`. + + Returns: + dict: The dict contains loaded image and meta information. + """ + + if self.file_client is None: + self.file_client = mmcv.FileClient(**self.file_client_args) + + if results.get('img_prefix') is not None: + filename = osp.join(results['img_prefix'], + results['img_info']['filename']) + else: + filename = results['img_info']['filename'] + img_bytes = self.file_client.get(filename) + img = mmcv.imfrombytes( + img_bytes, flag=self.color_type, backend=self.imdecode_backend) + if self.to_float32: + img = img.astype(np.float32) + + results['filename'] = filename + results['ori_filename'] = results['img_info']['filename'] + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + num_channels = 1 if len(img.shape) < 3 else img.shape[2] + results['img_norm_cfg'] = dict( + mean=np.zeros(num_channels, dtype=np.float32), + std=np.ones(num_channels, dtype=np.float32), + to_rgb=False) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(to_float32={self.to_float32},' + repr_str += f"color_type='{self.color_type}'," + repr_str += f"imdecode_backend='{self.imdecode_backend}')" + return repr_str + + +@PIPELINES.register_module() +class LoadAnnotations(object): + """Load annotations for semantic segmentation. + + Args: + reduce_zero_label (bool): Whether reduce all label value by 1. + Usually used for datasets where 0 is background label. + Default: False. + file_client_args (dict): Arguments to instantiate a FileClient. + See :class:`mmcv.fileio.FileClient` for details. + Defaults to ``dict(backend='disk')``. + imdecode_backend (str): Backend for :func:`mmcv.imdecode`. Default: + 'pillow' + """ + + def __init__(self, + reduce_zero_label=False, + file_client_args=dict(backend='disk'), + imdecode_backend='pillow'): + self.reduce_zero_label = reduce_zero_label + self.file_client_args = file_client_args.copy() + self.file_client = None + self.imdecode_backend = imdecode_backend + + def __call__(self, results): + """Call function to load multiple types annotations. + + Args: + results (dict): Result dict from :obj:`mmseg.CustomDataset`. + + Returns: + dict: The dict contains loaded semantic segmentation annotations. + """ + + if self.file_client is None: + self.file_client = mmcv.FileClient(**self.file_client_args) + + if results.get('seg_prefix', None) is not None: + filename = osp.join(results['seg_prefix'], + results['ann_info']['seg_map']) + else: + filename = results['ann_info']['seg_map'] + img_bytes = self.file_client.get(filename) + gt_semantic_seg = mmcv.imfrombytes( + img_bytes, flag='unchanged', + backend=self.imdecode_backend).squeeze().astype(np.uint8) + # modify if custom classes + if results.get('label_map', None) is not None: + # Add deep copy to solve bug of repeatedly + # replace `gt_semantic_seg`, which is reported in + # https://github.com/open-mmlab/mmsegmentation/pull/1445/ + gt_semantic_seg_copy = gt_semantic_seg.copy() + for old_id, new_id in results['label_map'].items(): + gt_semantic_seg[gt_semantic_seg_copy == old_id] = new_id + # reduce zero_label + if self.reduce_zero_label: + # avoid using underflow conversion + gt_semantic_seg[gt_semantic_seg == 0] = 255 + gt_semantic_seg = gt_semantic_seg - 1 + gt_semantic_seg[gt_semantic_seg == 254] = 255 + results['gt_semantic_seg'] = gt_semantic_seg + results['seg_fields'].append('gt_semantic_seg') + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(reduce_zero_label={self.reduce_zero_label},' + repr_str += f"imdecode_backend='{self.imdecode_backend}')" + return repr_str diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/test_time_aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/test_time_aug.py new file mode 100644 index 0000000..4964087 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/test_time_aug.py @@ -0,0 +1,142 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import mmcv + +from ..builder import PIPELINES +from .compose import Compose + + +@PIPELINES.register_module() +class MultiScaleFlipAug(object): + """Test-time augmentation with multiple scales and flipping. + + An example configuration is as followed: + + .. code-block:: + + img_scale=(2048, 1024), + img_ratios=[0.5, 1.0], + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ] + + After MultiScaleFLipAug with above configuration, the results are wrapped + into lists of the same length as followed: + + .. code-block:: + + dict( + img=[...], + img_shape=[...], + scale=[(1024, 512), (1024, 512), (2048, 1024), (2048, 1024)] + flip=[False, True, False, True] + ... + ) + + Args: + transforms (list[dict]): Transforms to apply in each augmentation. + img_scale (None | tuple | list[tuple]): Images scales for resizing. + img_ratios (float | list[float]): Image ratios for resizing + flip (bool): Whether apply flip augmentation. Default: False. + flip_direction (str | list[str]): Flip augmentation directions, + options are "horizontal" and "vertical". If flip_direction is list, + multiple flip augmentations will be applied. + It has no effect when flip == False. Default: "horizontal". + """ + + def __init__(self, + transforms, + img_scale, + img_ratios=None, + flip=False, + flip_direction='horizontal'): + if flip: + trans_index = { + key['type']: index + for index, key in enumerate(transforms) + } + if 'RandomFlip' in trans_index and 'Pad' in trans_index: + assert trans_index['RandomFlip'] < trans_index['Pad'], \ + 'Pad must be executed after RandomFlip when flip is True' + self.transforms = Compose(transforms) + if img_ratios is not None: + img_ratios = img_ratios if isinstance(img_ratios, + list) else [img_ratios] + assert mmcv.is_list_of(img_ratios, float) + if img_scale is None: + # mode 1: given img_scale=None and a range of image ratio + self.img_scale = None + assert mmcv.is_list_of(img_ratios, float) + elif isinstance(img_scale, tuple) and mmcv.is_list_of( + img_ratios, float): + assert len(img_scale) == 2 + # mode 2: given a scale and a range of image ratio + self.img_scale = [(int(img_scale[0] * ratio), + int(img_scale[1] * ratio)) + for ratio in img_ratios] + else: + # mode 3: given multiple scales + self.img_scale = img_scale if isinstance(img_scale, + list) else [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) or self.img_scale is None + self.flip = flip + self.img_ratios = img_ratios + self.flip_direction = flip_direction if isinstance( + flip_direction, list) else [flip_direction] + assert mmcv.is_list_of(self.flip_direction, str) + if not self.flip and self.flip_direction != ['horizontal']: + warnings.warn( + 'flip_direction has no effect when flip is set to False') + if (self.flip + and not any([t['type'] == 'RandomFlip' for t in transforms])): + warnings.warn( + 'flip has no effect when RandomFlip is not in transforms') + + def __call__(self, results): + """Call function to apply test time augment transforms on results. + + Args: + results (dict): Result dict contains the data to transform. + + Returns: + dict[str: list]: The augmented data, where each value is wrapped + into a list. + """ + + aug_data = [] + if self.img_scale is None and mmcv.is_list_of(self.img_ratios, float): + h, w = results['img'].shape[:2] + img_scale = [(int(w * ratio), int(h * ratio)) + for ratio in self.img_ratios] + else: + img_scale = self.img_scale + flip_aug = [False, True] if self.flip else [False] + for scale in img_scale: + for flip in flip_aug: + for direction in self.flip_direction: + _results = results.copy() + _results['scale'] = scale + _results['flip'] = flip + _results['flip_direction'] = direction + data = self.transforms(_results) + aug_data.append(data) + # list of dict to dict of list + aug_data_dict = {key: [] for key in aug_data[0]} + for data in aug_data: + for key, val in data.items(): + aug_data_dict[key].append(val) + return aug_data_dict + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(transforms={self.transforms}, ' + repr_str += f'img_scale={self.img_scale}, flip={self.flip})' + repr_str += f'flip_direction={self.flip_direction}' + return repr_str diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/transforms.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/transforms.py new file mode 100644 index 0000000..5673b64 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/pipelines/transforms.py @@ -0,0 +1,1335 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy + +import mmcv +import numpy as np +from mmcv.utils import deprecated_api_warning, is_tuple_of +from numpy import random + +from ..builder import PIPELINES + + +@PIPELINES.register_module() +class ResizeToMultiple(object): + """Resize images & seg to multiple of divisor. + + Args: + size_divisor (int): images and gt seg maps need to resize to multiple + of size_divisor. Default: 32. + interpolation (str, optional): The interpolation mode of image resize. + Default: None + """ + + def __init__(self, size_divisor=32, interpolation=None): + self.size_divisor = size_divisor + self.interpolation = interpolation + + def __call__(self, results): + """Call function to resize images, semantic segmentation map to + multiple of size divisor. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Resized results, 'img_shape', 'pad_shape' keys are updated. + """ + # Align image to multiple of size divisor. + img = results['img'] + img = mmcv.imresize_to_multiple( + img, + self.size_divisor, + scale_factor=1, + interpolation=self.interpolation + if self.interpolation else 'bilinear') + + results['img'] = img + results['img_shape'] = img.shape + results['pad_shape'] = img.shape + + # Align segmentation map to multiple of size divisor. + for key in results.get('seg_fields', []): + gt_seg = results[key] + gt_seg = mmcv.imresize_to_multiple( + gt_seg, + self.size_divisor, + scale_factor=1, + interpolation='nearest') + results[key] = gt_seg + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(size_divisor={self.size_divisor}, ' + f'interpolation={self.interpolation})') + return repr_str + + +@PIPELINES.register_module() +class Resize(object): + """Resize images & seg. + + This transform resizes the input image to some scale. If the input dict + contains the key "scale", then the scale in the input dict is used, + otherwise the specified scale in the init method is used. + + ``img_scale`` can be None, a tuple (single-scale) or a list of tuple + (multi-scale). There are 4 multiscale modes: + + - ``ratio_range is not None``: + 1. When img_scale is None, img_scale is the shape of image in results + (img_scale = results['img'].shape[:2]) and the image is resized based + on the original size. (mode 1) + 2. When img_scale is a tuple (single-scale), randomly sample a ratio from + the ratio range and multiply it with the image scale. (mode 2) + + - ``ratio_range is None and multiscale_mode == "range"``: randomly sample a + scale from the a range. (mode 3) + + - ``ratio_range is None and multiscale_mode == "value"``: randomly sample a + scale from multiple scales. (mode 4) + + Args: + img_scale (tuple or list[tuple]): Images scales for resizing. + Default:None. + multiscale_mode (str): Either "range" or "value". + Default: 'range' + ratio_range (tuple[float]): (min_ratio, max_ratio). + Default: None + keep_ratio (bool): Whether to keep the aspect ratio when resizing the + image. Default: True + min_size (int, optional): The minimum size for input and the shape + of the image and seg map will not be less than ``min_size``. + As the shape of model input is fixed like 'SETR' and 'BEiT'. + Following the setting in these models, resized images must be + bigger than the crop size in ``slide_inference``. Default: None + """ + + def __init__(self, + img_scale=None, + multiscale_mode='range', + ratio_range=None, + keep_ratio=True, + min_size=None): + if img_scale is None: + self.img_scale = None + else: + if isinstance(img_scale, list): + self.img_scale = img_scale + else: + self.img_scale = [img_scale] + assert mmcv.is_list_of(self.img_scale, tuple) + + if ratio_range is not None: + # mode 1: given img_scale=None and a range of image ratio + # mode 2: given a scale and a range of image ratio + assert self.img_scale is None or len(self.img_scale) == 1 + else: + # mode 3 and 4: given multiple scales or a range of scales + assert multiscale_mode in ['value', 'range'] + + self.multiscale_mode = multiscale_mode + self.ratio_range = ratio_range + self.keep_ratio = keep_ratio + self.min_size = min_size + + @staticmethod + def random_select(img_scales): + """Randomly select an img_scale from given candidates. + + Args: + img_scales (list[tuple]): Images scales for selection. + + Returns: + (tuple, int): Returns a tuple ``(img_scale, scale_dix)``, + where ``img_scale`` is the selected image scale and + ``scale_idx`` is the selected index in the given candidates. + """ + + assert mmcv.is_list_of(img_scales, tuple) + scale_idx = np.random.randint(len(img_scales)) + img_scale = img_scales[scale_idx] + return img_scale, scale_idx + + @staticmethod + def random_sample(img_scales): + """Randomly sample an img_scale when ``multiscale_mode=='range'``. + + Args: + img_scales (list[tuple]): Images scale range for sampling. + There must be two tuples in img_scales, which specify the lower + and upper bound of image scales. + + Returns: + (tuple, None): Returns a tuple ``(img_scale, None)``, where + ``img_scale`` is sampled scale and None is just a placeholder + to be consistent with :func:`random_select`. + """ + + assert mmcv.is_list_of(img_scales, tuple) and len(img_scales) == 2 + img_scale_long = [max(s) for s in img_scales] + img_scale_short = [min(s) for s in img_scales] + long_edge = np.random.randint( + min(img_scale_long), + max(img_scale_long) + 1) + short_edge = np.random.randint( + min(img_scale_short), + max(img_scale_short) + 1) + img_scale = (long_edge, short_edge) + return img_scale, None + + @staticmethod + def random_sample_ratio(img_scale, ratio_range): + """Randomly sample an img_scale when ``ratio_range`` is specified. + + A ratio will be randomly sampled from the range specified by + ``ratio_range``. Then it would be multiplied with ``img_scale`` to + generate sampled scale. + + Args: + img_scale (tuple): Images scale base to multiply with ratio. + ratio_range (tuple[float]): The minimum and maximum ratio to scale + the ``img_scale``. + + Returns: + (tuple, None): Returns a tuple ``(scale, None)``, where + ``scale`` is sampled ratio multiplied with ``img_scale`` and + None is just a placeholder to be consistent with + :func:`random_select`. + """ + + assert isinstance(img_scale, tuple) and len(img_scale) == 2 + min_ratio, max_ratio = ratio_range + assert min_ratio <= max_ratio + ratio = np.random.random_sample() * (max_ratio - min_ratio) + min_ratio + scale = int(img_scale[0] * ratio), int(img_scale[1] * ratio) + return scale, None + + def _random_scale(self, results): + """Randomly sample an img_scale according to ``ratio_range`` and + ``multiscale_mode``. + + If ``ratio_range`` is specified, a ratio will be sampled and be + multiplied with ``img_scale``. + If multiple scales are specified by ``img_scale``, a scale will be + sampled according to ``multiscale_mode``. + Otherwise, single scale will be used. + + Args: + results (dict): Result dict from :obj:`dataset`. + + Returns: + dict: Two new keys 'scale` and 'scale_idx` are added into + ``results``, which would be used by subsequent pipelines. + """ + + if self.ratio_range is not None: + if self.img_scale is None: + h, w = results['img'].shape[:2] + scale, scale_idx = self.random_sample_ratio((w, h), + self.ratio_range) + else: + scale, scale_idx = self.random_sample_ratio( + self.img_scale[0], self.ratio_range) + elif len(self.img_scale) == 1: + scale, scale_idx = self.img_scale[0], 0 + elif self.multiscale_mode == 'range': + scale, scale_idx = self.random_sample(self.img_scale) + elif self.multiscale_mode == 'value': + scale, scale_idx = self.random_select(self.img_scale) + else: + raise NotImplementedError + + results['scale'] = scale + results['scale_idx'] = scale_idx + + def _resize_img(self, results): + """Resize images with ``results['scale']``.""" + if self.keep_ratio: + if self.min_size is not None: + # TODO: Now 'min_size' is an 'int' which means the minimum + # shape of images is (min_size, min_size, 3). 'min_size' + # with tuple type will be supported, i.e. the width and + # height are not equal. + if min(results['scale']) < self.min_size: + new_short = self.min_size + else: + new_short = min(results['scale']) + + h, w = results['img'].shape[:2] + if h > w: + new_h, new_w = new_short * h / w, new_short + else: + new_h, new_w = new_short, new_short * w / h + results['scale'] = (new_h, new_w) + + img, scale_factor = mmcv.imrescale( + results['img'], results['scale'], return_scale=True) + # the w_scale and h_scale has minor difference + # a real fix should be done in the mmcv.imrescale in the future + new_h, new_w = img.shape[:2] + h, w = results['img'].shape[:2] + w_scale = new_w / w + h_scale = new_h / h + else: + img, w_scale, h_scale = mmcv.imresize( + results['img'], results['scale'], return_scale=True) + scale_factor = np.array([w_scale, h_scale, w_scale, h_scale], + dtype=np.float32) + results['img'] = img + results['img_shape'] = img.shape + results['pad_shape'] = img.shape # in case that there is no padding + results['scale_factor'] = scale_factor + results['keep_ratio'] = self.keep_ratio + + def _resize_seg(self, results): + """Resize semantic segmentation map with ``results['scale']``.""" + for key in results.get('seg_fields', []): + if self.keep_ratio: + gt_seg = mmcv.imrescale( + results[key], results['scale'], interpolation='nearest') + else: + gt_seg = mmcv.imresize( + results[key], results['scale'], interpolation='nearest') + results[key] = gt_seg + + def __call__(self, results): + """Call function to resize images, bounding boxes, masks, semantic + segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Resized results, 'img_shape', 'pad_shape', 'scale_factor', + 'keep_ratio' keys are added into result dict. + """ + + if 'scale' not in results: + self._random_scale(results) + self._resize_img(results) + self._resize_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(img_scale={self.img_scale}, ' + f'multiscale_mode={self.multiscale_mode}, ' + f'ratio_range={self.ratio_range}, ' + f'keep_ratio={self.keep_ratio})') + return repr_str + + +@PIPELINES.register_module() +class RandomFlip(object): + """Flip the image & seg. + + If the input dict contains the key "flip", then the flag will be used, + otherwise it will be randomly decided by a ratio specified in the init + method. + + Args: + prob (float, optional): The flipping probability. Default: None. + direction(str, optional): The flipping direction. Options are + 'horizontal' and 'vertical'. Default: 'horizontal'. + """ + + @deprecated_api_warning({'flip_ratio': 'prob'}, cls_name='RandomFlip') + def __init__(self, prob=None, direction='horizontal'): + self.prob = prob + self.direction = direction + if prob is not None: + assert prob >= 0 and prob <= 1 + assert direction in ['horizontal', 'vertical'] + + def __call__(self, results): + """Call function to flip bounding boxes, masks, semantic segmentation + maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Flipped results, 'flip', 'flip_direction' keys are added into + result dict. + """ + + if 'flip' not in results: + flip = True if np.random.rand() < self.prob else False + results['flip'] = flip + if 'flip_direction' not in results: + results['flip_direction'] = self.direction + if results['flip']: + # flip image + results['img'] = mmcv.imflip( + results['img'], direction=results['flip_direction']) + + # flip segs + for key in results.get('seg_fields', []): + # use copy() to make numpy stride positive + results[key] = mmcv.imflip( + results[key], direction=results['flip_direction']).copy() + return results + + def __repr__(self): + return self.__class__.__name__ + f'(prob={self.prob})' + + +@PIPELINES.register_module() +class Pad(object): + """Pad the image & mask. + + There are two padding modes: (1) pad to a fixed size and (2) pad to the + minimum size that is divisible by some number. + Added keys are "pad_shape", "pad_fixed_size", "pad_size_divisor", + + Args: + size (tuple, optional): Fixed padding size. + size_divisor (int, optional): The divisor of padded size. + pad_val (float, optional): Padding value. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + """ + + def __init__(self, + size=None, + size_divisor=None, + pad_val=0, + seg_pad_val=255): + self.size = size + self.size_divisor = size_divisor + self.pad_val = pad_val + self.seg_pad_val = seg_pad_val + # only one of size and size_divisor should be valid + assert size is not None or size_divisor is not None + assert size is None or size_divisor is None + + def _pad_img(self, results): + """Pad images according to ``self.size``.""" + if self.size is not None: + padded_img = mmcv.impad( + results['img'], shape=self.size, pad_val=self.pad_val) + elif self.size_divisor is not None: + padded_img = mmcv.impad_to_multiple( + results['img'], self.size_divisor, pad_val=self.pad_val) + results['img'] = padded_img + results['pad_shape'] = padded_img.shape + results['pad_fixed_size'] = self.size + results['pad_size_divisor'] = self.size_divisor + + def _pad_seg(self, results): + """Pad masks according to ``results['pad_shape']``.""" + for key in results.get('seg_fields', []): + results[key] = mmcv.impad( + results[key], + shape=results['pad_shape'][:2], + pad_val=self.seg_pad_val) + + def __call__(self, results): + """Call function to pad images, masks, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Updated result dict. + """ + + self._pad_img(results) + self._pad_seg(results) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(size={self.size}, size_divisor={self.size_divisor}, ' \ + f'pad_val={self.pad_val})' + return repr_str + + +@PIPELINES.register_module() +class Normalize(object): + """Normalize the image. + + Added key is "img_norm_cfg". + + Args: + mean (sequence): Mean values of 3 channels. + std (sequence): Std values of 3 channels. + to_rgb (bool): Whether to convert the image from BGR to RGB, + default is true. + """ + + def __init__(self, mean, std, to_rgb=True): + self.mean = np.array(mean, dtype=np.float32) + self.std = np.array(std, dtype=np.float32) + self.to_rgb = to_rgb + + def __call__(self, results): + """Call function to normalize images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Normalized results, 'img_norm_cfg' key is added into + result dict. + """ + + results['img'] = mmcv.imnormalize(results['img'], self.mean, self.std, + self.to_rgb) + results['img_norm_cfg'] = dict( + mean=self.mean, std=self.std, to_rgb=self.to_rgb) + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(mean={self.mean}, std={self.std}, to_rgb=' \ + f'{self.to_rgb})' + return repr_str + + +@PIPELINES.register_module() +class Rerange(object): + """Rerange the image pixel value. + + Args: + min_value (float or int): Minimum value of the reranged image. + Default: 0. + max_value (float or int): Maximum value of the reranged image. + Default: 255. + """ + + def __init__(self, min_value=0, max_value=255): + assert isinstance(min_value, float) or isinstance(min_value, int) + assert isinstance(max_value, float) or isinstance(max_value, int) + assert min_value < max_value + self.min_value = min_value + self.max_value = max_value + + def __call__(self, results): + """Call function to rerange images. + + Args: + results (dict): Result dict from loading pipeline. + Returns: + dict: Reranged results. + """ + + img = results['img'] + img_min_value = np.min(img) + img_max_value = np.max(img) + + assert img_min_value < img_max_value + # rerange to [0, 1] + img = (img - img_min_value) / (img_max_value - img_min_value) + # rerange to [min_value, max_value] + img = img * (self.max_value - self.min_value) + self.min_value + results['img'] = img + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(min_value={self.min_value}, max_value={self.max_value})' + return repr_str + + +@PIPELINES.register_module() +class CLAHE(object): + """Use CLAHE method to process the image. + + See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J]. + Graphics Gems, 1994:474-485.` for more information. + + Args: + clip_limit (float): Threshold for contrast limiting. Default: 40.0. + tile_grid_size (tuple[int]): Size of grid for histogram equalization. + Input image will be divided into equally sized rectangular tiles. + It defines the number of tiles in row and column. Default: (8, 8). + """ + + def __init__(self, clip_limit=40.0, tile_grid_size=(8, 8)): + assert isinstance(clip_limit, (float, int)) + self.clip_limit = clip_limit + assert is_tuple_of(tile_grid_size, int) + assert len(tile_grid_size) == 2 + self.tile_grid_size = tile_grid_size + + def __call__(self, results): + """Call function to Use CLAHE method process images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + for i in range(results['img'].shape[2]): + results['img'][:, :, i] = mmcv.clahe( + np.array(results['img'][:, :, i], dtype=np.uint8), + self.clip_limit, self.tile_grid_size) + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(clip_limit={self.clip_limit}, '\ + f'tile_grid_size={self.tile_grid_size})' + return repr_str + + +@PIPELINES.register_module() +class RandomCrop(object): + """Random crop the image & seg. + + Args: + crop_size (tuple): Expected size after cropping, (h, w). + cat_max_ratio (float): The maximum ratio that single category could + occupy. + """ + + def __init__(self, crop_size, cat_max_ratio=1., ignore_index=255): + assert crop_size[0] > 0 and crop_size[1] > 0 + self.crop_size = crop_size + self.cat_max_ratio = cat_max_ratio + self.ignore_index = ignore_index + + def get_crop_bbox(self, img): + """Randomly get a crop bounding box.""" + margin_h = max(img.shape[0] - self.crop_size[0], 0) + margin_w = max(img.shape[1] - self.crop_size[1], 0) + offset_h = np.random.randint(0, margin_h + 1) + offset_w = np.random.randint(0, margin_w + 1) + crop_y1, crop_y2 = offset_h, offset_h + self.crop_size[0] + crop_x1, crop_x2 = offset_w, offset_w + self.crop_size[1] + + return crop_y1, crop_y2, crop_x1, crop_x2 + + def crop(self, img, crop_bbox): + """Crop from ``img``""" + crop_y1, crop_y2, crop_x1, crop_x2 = crop_bbox + img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] + return img + + def __call__(self, results): + """Call function to randomly crop images, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Randomly cropped results, 'img_shape' key in result dict is + updated according to crop size. + """ + + img = results['img'] + crop_bbox = self.get_crop_bbox(img) + if self.cat_max_ratio < 1.: + # Repeat 10 times + for _ in range(10): + seg_temp = self.crop(results['gt_semantic_seg'], crop_bbox) + labels, cnt = np.unique(seg_temp, return_counts=True) + cnt = cnt[labels != self.ignore_index] + if len(cnt) > 1 and np.max(cnt) / np.sum( + cnt) < self.cat_max_ratio: + break + crop_bbox = self.get_crop_bbox(img) + + # crop the image + img = self.crop(img, crop_bbox) + img_shape = img.shape + results['img'] = img + results['img_shape'] = img_shape + + # crop semantic seg + for key in results.get('seg_fields', []): + results[key] = self.crop(results[key], crop_bbox) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(crop_size={self.crop_size})' + + +@PIPELINES.register_module() +class RandomRotate(object): + """Rotate the image & seg. + + Args: + prob (float): The rotation probability. + degree (float, tuple[float]): Range of degrees to select from. If + degree is a number instead of tuple like (min, max), + the range of degree will be (``-degree``, ``+degree``) + pad_val (float, optional): Padding value of image. Default: 0. + seg_pad_val (float, optional): Padding value of segmentation map. + Default: 255. + center (tuple[float], optional): Center point (w, h) of the rotation in + the source image. If not specified, the center of the image will be + used. Default: None. + auto_bound (bool): Whether to adjust the image size to cover the whole + rotated image. Default: False + """ + + def __init__(self, + prob, + degree, + pad_val=0, + seg_pad_val=255, + center=None, + auto_bound=False): + self.prob = prob + assert prob >= 0 and prob <= 1 + if isinstance(degree, (float, int)): + assert degree > 0, f'degree {degree} should be positive' + self.degree = (-degree, degree) + else: + self.degree = degree + assert len(self.degree) == 2, f'degree {self.degree} should be a ' \ + f'tuple of (min, max)' + self.pal_val = pad_val + self.seg_pad_val = seg_pad_val + self.center = center + self.auto_bound = auto_bound + + def __call__(self, results): + """Call function to rotate image, semantic segmentation maps. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Rotated results. + """ + + rotate = True if np.random.rand() < self.prob else False + degree = np.random.uniform(min(*self.degree), max(*self.degree)) + if rotate: + # rotate image + results['img'] = mmcv.imrotate( + results['img'], + angle=degree, + border_value=self.pal_val, + center=self.center, + auto_bound=self.auto_bound) + + # rotate segs + for key in results.get('seg_fields', []): + results[key] = mmcv.imrotate( + results[key], + angle=degree, + border_value=self.seg_pad_val, + center=self.center, + auto_bound=self.auto_bound, + interpolation='nearest') + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' \ + f'degree={self.degree}, ' \ + f'pad_val={self.pal_val}, ' \ + f'seg_pad_val={self.seg_pad_val}, ' \ + f'center={self.center}, ' \ + f'auto_bound={self.auto_bound})' + return repr_str + + +@PIPELINES.register_module() +class RGB2Gray(object): + """Convert RGB image to grayscale image. + + This transform calculate the weighted mean of input image channels with + ``weights`` and then expand the channels to ``out_channels``. When + ``out_channels`` is None, the number of output channels is the same as + input channels. + + Args: + out_channels (int): Expected number of output channels after + transforming. Default: None. + weights (tuple[float]): The weights to calculate the weighted mean. + Default: (0.299, 0.587, 0.114). + """ + + def __init__(self, out_channels=None, weights=(0.299, 0.587, 0.114)): + assert out_channels is None or out_channels > 0 + self.out_channels = out_channels + assert isinstance(weights, tuple) + for item in weights: + assert isinstance(item, (float, int)) + self.weights = weights + + def __call__(self, results): + """Call function to convert RGB image to grayscale image. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with grayscale image. + """ + img = results['img'] + assert len(img.shape) == 3 + assert img.shape[2] == len(self.weights) + weights = np.array(self.weights).reshape((1, 1, -1)) + img = (img * weights).sum(2, keepdims=True) + if self.out_channels is None: + img = img.repeat(weights.shape[2], axis=2) + else: + img = img.repeat(self.out_channels, axis=2) + + results['img'] = img + results['img_shape'] = img.shape + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(out_channels={self.out_channels}, ' \ + f'weights={self.weights})' + return repr_str + + +@PIPELINES.register_module() +class AdjustGamma(object): + """Using gamma correction to process the image. + + Args: + gamma (float or int): Gamma value used in gamma correction. + Default: 1.0. + """ + + def __init__(self, gamma=1.0): + assert isinstance(gamma, float) or isinstance(gamma, int) + assert gamma > 0 + self.gamma = gamma + inv_gamma = 1.0 / gamma + self.table = np.array([(i / 255.0)**inv_gamma * 255 + for i in np.arange(256)]).astype('uint8') + + def __call__(self, results): + """Call function to process the image with gamma correction. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Processed results. + """ + + results['img'] = mmcv.lut_transform( + np.array(results['img'], dtype=np.uint8), self.table) + + return results + + def __repr__(self): + return self.__class__.__name__ + f'(gamma={self.gamma})' + + +@PIPELINES.register_module() +class SegRescale(object): + """Rescale semantic segmentation maps. + + Args: + scale_factor (float): The scale factor of the final output. + """ + + def __init__(self, scale_factor=1): + self.scale_factor = scale_factor + + def __call__(self, results): + """Call function to scale the semantic segmentation map. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with semantic segmentation map scaled. + """ + for key in results.get('seg_fields', []): + if self.scale_factor != 1: + results[key] = mmcv.imrescale( + results[key], self.scale_factor, interpolation='nearest') + return results + + def __repr__(self): + return self.__class__.__name__ + f'(scale_factor={self.scale_factor})' + + +@PIPELINES.register_module() +class PhotoMetricDistortion(object): + """Apply photometric distortion to image sequentially, every transformation + is applied with a probability of 0.5. The position of random contrast is in + second or second to last. + + 1. random brightness + 2. random contrast (mode 0) + 3. convert color from BGR to HSV + 4. random saturation + 5. random hue + 6. convert color from HSV to BGR + 7. random contrast (mode 1) + + Args: + brightness_delta (int): delta of brightness. + contrast_range (tuple): range of contrast. + saturation_range (tuple): range of saturation. + hue_delta (int): delta of hue. + """ + + def __init__(self, + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18): + self.brightness_delta = brightness_delta + self.contrast_lower, self.contrast_upper = contrast_range + self.saturation_lower, self.saturation_upper = saturation_range + self.hue_delta = hue_delta + + def convert(self, img, alpha=1, beta=0): + """Multiple with alpha and add beat with clip.""" + img = img.astype(np.float32) * alpha + beta + img = np.clip(img, 0, 255) + return img.astype(np.uint8) + + def brightness(self, img): + """Brightness distortion.""" + if random.randint(2): + return self.convert( + img, + beta=random.uniform(-self.brightness_delta, + self.brightness_delta)) + return img + + def contrast(self, img): + """Contrast distortion.""" + if random.randint(2): + return self.convert( + img, + alpha=random.uniform(self.contrast_lower, self.contrast_upper)) + return img + + def saturation(self, img): + """Saturation distortion.""" + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, 1] = self.convert( + img[:, :, 1], + alpha=random.uniform(self.saturation_lower, + self.saturation_upper)) + img = mmcv.hsv2bgr(img) + return img + + def hue(self, img): + """Hue distortion.""" + if random.randint(2): + img = mmcv.bgr2hsv(img) + img[:, :, + 0] = (img[:, :, 0].astype(int) + + random.randint(-self.hue_delta, self.hue_delta)) % 180 + img = mmcv.hsv2bgr(img) + return img + + def __call__(self, results): + """Call function to perform photometric distortion on images. + + Args: + results (dict): Result dict from loading pipeline. + + Returns: + dict: Result dict with images distorted. + """ + + img = results['img'] + # random brightness + img = self.brightness(img) + + # mode == 0 --> do random contrast first + # mode == 1 --> do random contrast last + mode = random.randint(2) + if mode == 1: + img = self.contrast(img) + + # random saturation + img = self.saturation(img) + + # random hue + img = self.hue(img) + + # random contrast + if mode == 0: + img = self.contrast(img) + + results['img'] = img + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += (f'(brightness_delta={self.brightness_delta}, ' + f'contrast_range=({self.contrast_lower}, ' + f'{self.contrast_upper}), ' + f'saturation_range=({self.saturation_lower}, ' + f'{self.saturation_upper}), ' + f'hue_delta={self.hue_delta})') + return repr_str + + +@PIPELINES.register_module() +class RandomCutOut(object): + """CutOut operation. + + Randomly drop some regions of image used in + `Cutout `_. + Args: + prob (float): cutout probability. + n_holes (int | tuple[int, int]): Number of regions to be dropped. + If it is given as a list, number of holes will be randomly + selected from the closed interval [`n_holes[0]`, `n_holes[1]`]. + cutout_shape (tuple[int, int] | list[tuple[int, int]]): The candidate + shape of dropped regions. It can be `tuple[int, int]` to use a + fixed cutout shape, or `list[tuple[int, int]]` to randomly choose + shape from the list. + cutout_ratio (tuple[float, float] | list[tuple[float, float]]): The + candidate ratio of dropped regions. It can be `tuple[float, float]` + to use a fixed ratio or `list[tuple[float, float]]` to randomly + choose ratio from the list. Please note that `cutout_shape` + and `cutout_ratio` cannot be both given at the same time. + fill_in (tuple[float, float, float] | tuple[int, int, int]): The value + of pixel to fill in the dropped regions. Default: (0, 0, 0). + seg_fill_in (int): The labels of pixel to fill in the dropped regions. + If seg_fill_in is None, skip. Default: None. + """ + + def __init__(self, + prob, + n_holes, + cutout_shape=None, + cutout_ratio=None, + fill_in=(0, 0, 0), + seg_fill_in=None): + + assert 0 <= prob and prob <= 1 + assert (cutout_shape is None) ^ (cutout_ratio is None), \ + 'Either cutout_shape or cutout_ratio should be specified.' + assert (isinstance(cutout_shape, (list, tuple)) + or isinstance(cutout_ratio, (list, tuple))) + if isinstance(n_holes, tuple): + assert len(n_holes) == 2 and 0 <= n_holes[0] < n_holes[1] + else: + n_holes = (n_holes, n_holes) + if seg_fill_in is not None: + assert (isinstance(seg_fill_in, int) and 0 <= seg_fill_in + and seg_fill_in <= 255) + self.prob = prob + self.n_holes = n_holes + self.fill_in = fill_in + self.seg_fill_in = seg_fill_in + self.with_ratio = cutout_ratio is not None + self.candidates = cutout_ratio if self.with_ratio else cutout_shape + if not isinstance(self.candidates, list): + self.candidates = [self.candidates] + + def __call__(self, results): + """Call function to drop some regions of image.""" + cutout = True if np.random.rand() < self.prob else False + if cutout: + h, w, c = results['img'].shape + n_holes = np.random.randint(self.n_holes[0], self.n_holes[1] + 1) + for _ in range(n_holes): + x1 = np.random.randint(0, w) + y1 = np.random.randint(0, h) + index = np.random.randint(0, len(self.candidates)) + if not self.with_ratio: + cutout_w, cutout_h = self.candidates[index] + else: + cutout_w = int(self.candidates[index][0] * w) + cutout_h = int(self.candidates[index][1] * h) + + x2 = np.clip(x1 + cutout_w, 0, w) + y2 = np.clip(y1 + cutout_h, 0, h) + results['img'][y1:y2, x1:x2, :] = self.fill_in + + if self.seg_fill_in is not None: + for key in results.get('seg_fields', []): + results[key][y1:y2, x1:x2] = self.seg_fill_in + + return results + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' + repr_str += f'n_holes={self.n_holes}, ' + repr_str += (f'cutout_ratio={self.candidates}, ' if self.with_ratio + else f'cutout_shape={self.candidates}, ') + repr_str += f'fill_in={self.fill_in}, ' + repr_str += f'seg_fill_in={self.seg_fill_in})' + return repr_str + + +@PIPELINES.register_module() +class RandomMosaic(object): + """Mosaic augmentation. Given 4 images, mosaic transform combines them into + one output image. The output image is composed of the parts from each sub- + image. + + .. code:: text + + mosaic transform + center_x + +------------------------------+ + | pad | pad | + | +-----------+ | + | | | | + | | image1 |--------+ | + | | | | | + | | | image2 | | + center_y |----+-------------+-----------| + | | cropped | | + |pad | image3 | image4 | + | | | | + +----|-------------+-----------+ + | | + +-------------+ + + The mosaic transform steps are as follows: + 1. Choose the mosaic center as the intersections of 4 images + 2. Get the left top image according to the index, and randomly + sample another 3 images from the custom dataset. + 3. Sub image will be cropped if image is larger than mosaic patch + + Args: + prob (float): mosaic probability. + img_scale (Sequence[int]): Image size after mosaic pipeline of + a single image. The size of the output image is four times + that of a single image. The output image comprises 4 single images. + Default: (640, 640). + center_ratio_range (Sequence[float]): Center ratio range of mosaic + output. Default: (0.5, 1.5). + pad_val (int): Pad value. Default: 0. + seg_pad_val (int): Pad value of segmentation map. Default: 255. + """ + + def __init__(self, + prob, + img_scale=(640, 640), + center_ratio_range=(0.5, 1.5), + pad_val=0, + seg_pad_val=255): + assert 0 <= prob and prob <= 1 + assert isinstance(img_scale, tuple) + self.prob = prob + self.img_scale = img_scale + self.center_ratio_range = center_ratio_range + self.pad_val = pad_val + self.seg_pad_val = seg_pad_val + + def __call__(self, results): + """Call function to make a mosaic of image. + + Args: + results (dict): Result dict. + + Returns: + dict: Result dict with mosaic transformed. + """ + mosaic = True if np.random.rand() < self.prob else False + if mosaic: + results = self._mosaic_transform_img(results) + results = self._mosaic_transform_seg(results) + return results + + def get_indexes(self, dataset): + """Call function to collect indexes. + + Args: + dataset (:obj:`MultiImageMixDataset`): The dataset. + + Returns: + list: indexes. + """ + + indexes = [random.randint(0, len(dataset)) for _ in range(3)] + return indexes + + def _mosaic_transform_img(self, results): + """Mosaic transform function. + + Args: + results (dict): Result dict. + + Returns: + dict: Updated result dict. + """ + + assert 'mix_results' in results + if len(results['img'].shape) == 3: + mosaic_img = np.full( + (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2), 3), + self.pad_val, + dtype=results['img'].dtype) + else: + mosaic_img = np.full( + (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2)), + self.pad_val, + dtype=results['img'].dtype) + + # mosaic center x, y + self.center_x = int( + random.uniform(*self.center_ratio_range) * self.img_scale[1]) + self.center_y = int( + random.uniform(*self.center_ratio_range) * self.img_scale[0]) + center_position = (self.center_x, self.center_y) + + loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right') + for i, loc in enumerate(loc_strs): + if loc == 'top_left': + result_patch = copy.deepcopy(results) + else: + result_patch = copy.deepcopy(results['mix_results'][i - 1]) + + img_i = result_patch['img'] + h_i, w_i = img_i.shape[:2] + # keep_ratio resize + scale_ratio_i = min(self.img_scale[0] / h_i, + self.img_scale[1] / w_i) + img_i = mmcv.imresize( + img_i, (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i))) + + # compute the combine parameters + paste_coord, crop_coord = self._mosaic_combine( + loc, center_position, img_i.shape[:2][::-1]) + x1_p, y1_p, x2_p, y2_p = paste_coord + x1_c, y1_c, x2_c, y2_c = crop_coord + + # crop and paste image + mosaic_img[y1_p:y2_p, x1_p:x2_p] = img_i[y1_c:y2_c, x1_c:x2_c] + + results['img'] = mosaic_img + results['img_shape'] = mosaic_img.shape + results['ori_shape'] = mosaic_img.shape + + return results + + def _mosaic_transform_seg(self, results): + """Mosaic transform function for label annotations. + + Args: + results (dict): Result dict. + + Returns: + dict: Updated result dict. + """ + + assert 'mix_results' in results + for key in results.get('seg_fields', []): + mosaic_seg = np.full( + (int(self.img_scale[0] * 2), int(self.img_scale[1] * 2)), + self.seg_pad_val, + dtype=results[key].dtype) + + # mosaic center x, y + center_position = (self.center_x, self.center_y) + + loc_strs = ('top_left', 'top_right', 'bottom_left', 'bottom_right') + for i, loc in enumerate(loc_strs): + if loc == 'top_left': + result_patch = copy.deepcopy(results) + else: + result_patch = copy.deepcopy(results['mix_results'][i - 1]) + + gt_seg_i = result_patch[key] + h_i, w_i = gt_seg_i.shape[:2] + # keep_ratio resize + scale_ratio_i = min(self.img_scale[0] / h_i, + self.img_scale[1] / w_i) + gt_seg_i = mmcv.imresize( + gt_seg_i, + (int(w_i * scale_ratio_i), int(h_i * scale_ratio_i)), + interpolation='nearest') + + # compute the combine parameters + paste_coord, crop_coord = self._mosaic_combine( + loc, center_position, gt_seg_i.shape[:2][::-1]) + x1_p, y1_p, x2_p, y2_p = paste_coord + x1_c, y1_c, x2_c, y2_c = crop_coord + + # crop and paste image + mosaic_seg[y1_p:y2_p, x1_p:x2_p] = gt_seg_i[y1_c:y2_c, + x1_c:x2_c] + + results[key] = mosaic_seg + + return results + + def _mosaic_combine(self, loc, center_position_xy, img_shape_wh): + """Calculate global coordinate of mosaic image and local coordinate of + cropped sub-image. + + Args: + loc (str): Index for the sub-image, loc in ('top_left', + 'top_right', 'bottom_left', 'bottom_right'). + center_position_xy (Sequence[float]): Mixing center for 4 images, + (x, y). + img_shape_wh (Sequence[int]): Width and height of sub-image + + Returns: + tuple[tuple[float]]: Corresponding coordinate of pasting and + cropping + - paste_coord (tuple): paste corner coordinate in mosaic image. + - crop_coord (tuple): crop corner coordinate in mosaic image. + """ + + assert loc in ('top_left', 'top_right', 'bottom_left', 'bottom_right') + if loc == 'top_left': + # index0 to top left part of image + x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \ + max(center_position_xy[1] - img_shape_wh[1], 0), \ + center_position_xy[0], \ + center_position_xy[1] + crop_coord = img_shape_wh[0] - (x2 - x1), img_shape_wh[1] - ( + y2 - y1), img_shape_wh[0], img_shape_wh[1] + + elif loc == 'top_right': + # index1 to top right part of image + x1, y1, x2, y2 = center_position_xy[0], \ + max(center_position_xy[1] - img_shape_wh[1], 0), \ + min(center_position_xy[0] + img_shape_wh[0], + self.img_scale[1] * 2), \ + center_position_xy[1] + crop_coord = 0, img_shape_wh[1] - (y2 - y1), min( + img_shape_wh[0], x2 - x1), img_shape_wh[1] + + elif loc == 'bottom_left': + # index2 to bottom left part of image + x1, y1, x2, y2 = max(center_position_xy[0] - img_shape_wh[0], 0), \ + center_position_xy[1], \ + center_position_xy[0], \ + min(self.img_scale[0] * 2, center_position_xy[1] + + img_shape_wh[1]) + crop_coord = img_shape_wh[0] - (x2 - x1), 0, img_shape_wh[0], min( + y2 - y1, img_shape_wh[1]) + + else: + # index3 to bottom right part of image + x1, y1, x2, y2 = center_position_xy[0], \ + center_position_xy[1], \ + min(center_position_xy[0] + img_shape_wh[0], + self.img_scale[1] * 2), \ + min(self.img_scale[0] * 2, center_position_xy[1] + + img_shape_wh[1]) + crop_coord = 0, 0, min(img_shape_wh[0], + x2 - x1), min(y2 - y1, img_shape_wh[1]) + + paste_coord = x1, y1, x2, y2 + return paste_coord, crop_coord + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(prob={self.prob}, ' + repr_str += f'img_scale={self.img_scale}, ' + repr_str += f'center_ratio_range={self.center_ratio_range}, ' + repr_str += f'pad_val={self.pad_val}, ' + repr_str += f'seg_pad_val={self.pad_val})' + return repr_str diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/potsdam.py new file mode 100644 index 0000000..2986b8f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/potsdam.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PotsdamDataset(CustomDataset): + """ISPRS Potsdam dataset. + + In segmentation map annotation for Potsdam dataset, 0 is the ignore index. + ``reduce_zero_label`` should be set to True. The ``img_suffix`` and + ``seg_map_suffix`` are both fixed to '.png'. + """ + CLASSES = ('impervious_surface', 'building', 'low_vegetation', 'tree', + 'car', 'clutter') + + PALETTE = [[255, 255, 255], [0, 0, 255], [0, 255, 255], [0, 255, 0], + [255, 255, 0], [255, 0, 0]] + + def __init__(self, **kwargs): + super(PotsdamDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.png', + reduce_zero_label=True, + **kwargs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/samplers/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/samplers/__init__.py new file mode 100644 index 0000000..da09eff --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/samplers/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .distributed_sampler import DistributedSampler + +__all__ = ['DistributedSampler'] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/samplers/distributed_sampler.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/samplers/distributed_sampler.py new file mode 100644 index 0000000..4f9bf35 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/samplers/distributed_sampler.py @@ -0,0 +1,73 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from __future__ import division +from typing import Iterator, Optional + +import torch +from torch.utils.data import Dataset +from torch.utils.data import DistributedSampler as _DistributedSampler + +from mmseg.core.utils import sync_random_seed +from mmseg.utils import get_device + + +class DistributedSampler(_DistributedSampler): + """DistributedSampler inheriting from + `torch.utils.data.DistributedSampler`. + + Args: + datasets (Dataset): the dataset will be loaded. + num_replicas (int, optional): Number of processes participating in + distributed training. By default, world_size is retrieved from the + current distributed group. + rank (int, optional): Rank of the current process within num_replicas. + By default, rank is retrieved from the current distributed group. + shuffle (bool): If True (default), sampler will shuffle the indices. + seed (int): random seed used to shuffle the sampler if + :attr:`shuffle=True`. This number should be identical across all + processes in the distributed group. Default: ``0``. + """ + + def __init__(self, + dataset: Dataset, + num_replicas: Optional[int] = None, + rank: Optional[int] = None, + shuffle: bool = True, + seed=0) -> None: + super().__init__( + dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) + + # In distributed sampling, different ranks should sample + # non-overlapped data in the dataset. Therefore, this function + # is used to make sure that each rank shuffles the data indices + # in the same order based on the same seed. Then different ranks + # could use different indices to select non-overlapped data from the + # same data list. + device = get_device() + self.seed = sync_random_seed(seed, device) + + def __iter__(self) -> Iterator: + """ + Yields: + Iterator: iterator of indices for rank. + """ + # deterministically shuffle based on epoch + if self.shuffle: + g = torch.Generator() + # When :attr:`shuffle=True`, this ensures all replicas + # use a different random ordering for each epoch. + # Otherwise, the next iteration of this sampler will + # yield the same ordering. + g.manual_seed(self.epoch + self.seed) + indices = torch.randperm(len(self.dataset), generator=g).tolist() + else: + indices = torch.arange(len(self.dataset)).tolist() + + # add extra samples to make it evenly divisible + indices += indices[:(self.total_size - len(indices))] + assert len(indices) == self.total_size + + # subsample + indices = indices[self.rank:self.total_size:self.num_replicas] + assert len(indices) == self.num_samples + + return iter(indices) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/stare.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/stare.py new file mode 100644 index 0000000..a24d1d9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/stare.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class STAREDataset(CustomDataset): + """STARE dataset. + + In segmentation map annotation for STARE, 0 stands for background, which is + included in 2 categories. ``reduce_zero_label`` is fixed to False. The + ``img_suffix`` is fixed to '.png' and ``seg_map_suffix`` is fixed to + '.ah.png'. + """ + + CLASSES = ('background', 'vessel') + + PALETTE = [[120, 120, 120], [6, 230, 230]] + + def __init__(self, **kwargs): + super(STAREDataset, self).__init__( + img_suffix='.png', + seg_map_suffix='.ah.png', + reduce_zero_label=False, + **kwargs) + assert osp.exists(self.img_dir) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/voc.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/voc.py new file mode 100644 index 0000000..3cec9e3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/datasets/voc.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +from .builder import DATASETS +from .custom import CustomDataset + + +@DATASETS.register_module() +class PascalVOCDataset(CustomDataset): + """Pascal VOC dataset. + + Args: + split (str): Split txt file for Pascal VOC. + """ + + CLASSES = ('background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', + 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', + 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', + 'train', 'tvmonitor') + + PALETTE = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], [0, 0, 128], + [128, 0, 128], [0, 128, 128], [128, 128, 128], [64, 0, 0], + [192, 0, 0], [64, 128, 0], [192, 128, 0], [64, 0, 128], + [192, 0, 128], [64, 128, 128], [192, 128, 128], [0, 64, 0], + [128, 64, 0], [0, 192, 0], [128, 192, 0], [0, 64, 128]] + + def __init__(self, split, **kwargs): + super(PascalVOCDataset, self).__init__( + img_suffix='.jpg', seg_map_suffix='.png', split=split, **kwargs) + assert osp.exists(self.img_dir) and self.split is not None diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/__init__.py new file mode 100644 index 0000000..87d8108 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .backbones import * # noqa: F401,F403 +from .builder import (BACKBONES, HEADS, LOSSES, SEGMENTORS, build_backbone, + build_head, build_loss, build_segmentor) +from .decode_heads import * # noqa: F401,F403 +from .losses import * # noqa: F401,F403 +from .necks import * # noqa: F401,F403 +from .segmentors import * # noqa: F401,F403 + +__all__ = [ + 'BACKBONES', 'HEADS', 'LOSSES', 'SEGMENTORS', 'build_backbone', + 'build_head', 'build_loss', 'build_segmentor' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/__init__.py new file mode 100644 index 0000000..bda42bb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/__init__.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .beit import BEiT +from .bisenetv1 import BiSeNetV1 +from .bisenetv2 import BiSeNetV2 +from .cgnet import CGNet +from .erfnet import ERFNet +from .fast_scnn import FastSCNN +from .hrnet import HRNet +from .icnet import ICNet +from .mae import MAE +from .mit import MixVisionTransformer +from .mobilenet_v2 import MobileNetV2 +from .mobilenet_v3 import MobileNetV3 +from .resnest import ResNeSt +from .resnet import ResNet, ResNetV1c, ResNetV1d +from .resnext import ResNeXt +from .stdc import STDCContextPathNet, STDCNet +from .swin import SwinTransformer +from .timm_backbone import TIMMBackbone +from .twins import PCPVT, SVT +from .unet import UNet +from .vit import VisionTransformer + +__all__ = [ + 'ResNet', 'ResNetV1c', 'ResNetV1d', 'ResNeXt', 'HRNet', 'FastSCNN', + 'ResNeSt', 'MobileNetV2', 'UNet', 'CGNet', 'MobileNetV3', + 'VisionTransformer', 'SwinTransformer', 'MixVisionTransformer', + 'BiSeNetV1', 'BiSeNetV2', 'ICNet', 'TIMMBackbone', 'ERFNet', 'PCPVT', + 'SVT', 'STDCNet', 'STDCContextPathNet', 'BEiT', 'MAE' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/beit.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/beit.py new file mode 100644 index 0000000..fade601 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/beit.py @@ -0,0 +1,559 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import build_norm_layer +from mmcv.cnn.bricks.drop import build_dropout +from mmcv.cnn.utils.weight_init import (constant_init, kaiming_init, + trunc_normal_) +from mmcv.runner import BaseModule, ModuleList, _load_checkpoint +from torch.nn.modules.batchnorm import _BatchNorm +from torch.nn.modules.utils import _pair as to_2tuple + +from mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import PatchEmbed +from .vit import TransformerEncoderLayer as VisionTransformerEncoderLayer + +try: + from scipy import interpolate +except ImportError: + interpolate = None + + +class BEiTAttention(BaseModule): + """Window based multi-head self-attention (W-MSA) module with relative + position bias. + + Args: + embed_dims (int): Number of input channels. + num_heads (int): Number of attention heads. + window_size (tuple[int]): The height and width of the window. + bias (bool): The option to add leanable bias for q, k, v. If bias is + True, it will add leanable bias. If bias is 'qv_bias', it will only + add leanable bias for q, v. If bias is False, it will not add bias + for q, k, v. Default to 'qv_bias'. + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + attn_drop_rate (float): Dropout ratio of attention weight. + Default: 0.0 + proj_drop_rate (float): Dropout ratio of output. Default: 0. + init_cfg (dict | None, optional): The Config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + window_size, + bias='qv_bias', + qk_scale=None, + attn_drop_rate=0., + proj_drop_rate=0., + init_cfg=None, + **kwargs): + super().__init__(init_cfg=init_cfg) + self.embed_dims = embed_dims + self.num_heads = num_heads + head_embed_dims = embed_dims // num_heads + self.bias = bias + self.scale = qk_scale or head_embed_dims**-0.5 + + qkv_bias = bias + if bias == 'qv_bias': + self._init_qv_bias() + qkv_bias = False + + self.window_size = window_size + self._init_rel_pos_embedding() + + self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop_rate) + self.proj = nn.Linear(embed_dims, embed_dims) + self.proj_drop = nn.Dropout(proj_drop_rate) + + def _init_qv_bias(self): + self.q_bias = nn.Parameter(torch.zeros(self.embed_dims)) + self.v_bias = nn.Parameter(torch.zeros(self.embed_dims)) + + def _init_rel_pos_embedding(self): + Wh, Ww = self.window_size + # cls to token & token 2 cls & cls to cls + self.num_relative_distance = (2 * Wh - 1) * (2 * Ww - 1) + 3 + # relative_position_bias_table shape is (2*Wh-1 * 2*Ww-1 + 3, nH) + self.relative_position_bias_table = nn.Parameter( + torch.zeros(self.num_relative_distance, self.num_heads)) + + # get pair-wise relative position index for + # each token inside the window + coords_h = torch.arange(Wh) + coords_w = torch.arange(Ww) + # coords shape is (2, Wh, Ww) + coords = torch.stack(torch.meshgrid([coords_h, coords_w])) + # coords_flatten shape is (2, Wh*Ww) + coords_flatten = torch.flatten(coords, 1) + relative_coords = ( + coords_flatten[:, :, None] - coords_flatten[:, None, :]) + # relative_coords shape is (Wh*Ww, Wh*Ww, 2) + relative_coords = relative_coords.permute(1, 2, 0).contiguous() + # shift to start from 0 + relative_coords[:, :, 0] += Wh - 1 + relative_coords[:, :, 1] += Ww - 1 + relative_coords[:, :, 0] *= 2 * Ww - 1 + relative_position_index = torch.zeros( + size=(Wh * Ww + 1, ) * 2, dtype=relative_coords.dtype) + # relative_position_index shape is (Wh*Ww, Wh*Ww) + relative_position_index[1:, 1:] = relative_coords.sum(-1) + relative_position_index[0, 0:] = self.num_relative_distance - 3 + relative_position_index[0:, 0] = self.num_relative_distance - 2 + relative_position_index[0, 0] = self.num_relative_distance - 1 + + self.register_buffer('relative_position_index', + relative_position_index) + + def init_weights(self): + trunc_normal_(self.relative_position_bias_table, std=0.02) + + def forward(self, x): + """ + Args: + x (tensor): input features with shape of (num_windows*B, N, C). + """ + B, N, C = x.shape + + if self.bias == 'qv_bias': + k_bias = torch.zeros_like(self.v_bias, requires_grad=False) + qkv_bias = torch.cat((self.q_bias, k_bias, self.v_bias)) + qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias) + else: + qkv = self.qkv(x) + + qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) + q, k, v = qkv[0], qkv[1], qkv[2] + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + if self.relative_position_bias_table is not None: + Wh = self.window_size[0] + Ww = self.window_size[1] + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1)].view( + Wh * Ww + 1, Wh * Ww + 1, -1) + relative_position_bias = relative_position_bias.permute( + 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class BEiTTransformerEncoderLayer(VisionTransformerEncoderLayer): + """Implements one encoder layer in Vision Transformer. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + attn_drop_rate (float): The drop out rate for attention layer. + Default: 0.0. + drop_path_rate (float): Stochastic depth rate. Default 0.0. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + bias (bool): The option to add leanable bias for q, k, v. If bias is + True, it will add leanable bias. If bias is 'qv_bias', it will only + add leanable bias for q, v. If bias is False, it will not add bias + for q, k, v. Default to 'qv_bias'. + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + window_size (tuple[int], optional): The height and width of the window. + Default: None. + init_values (float, optional): Initialize the values of BEiTAttention + and FFN with learnable scaling. Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + attn_drop_rate=0., + drop_path_rate=0., + num_fcs=2, + bias='qv_bias', + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + window_size=None, + attn_cfg=dict(), + ffn_cfg=dict(add_identity=False), + init_values=None): + attn_cfg.update(dict(window_size=window_size, qk_scale=None)) + + super(BEiTTransformerEncoderLayer, self).__init__( + embed_dims=embed_dims, + num_heads=num_heads, + feedforward_channels=feedforward_channels, + attn_drop_rate=attn_drop_rate, + drop_path_rate=0., + drop_rate=0., + num_fcs=num_fcs, + qkv_bias=bias, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + attn_cfg=attn_cfg, + ffn_cfg=ffn_cfg) + + # NOTE: drop path for stochastic depth, we shall see if + # this is better than dropout here + dropout_layer = dict(type='DropPath', drop_prob=drop_path_rate) + self.drop_path = build_dropout( + dropout_layer) if dropout_layer else nn.Identity() + self.gamma_1 = nn.Parameter( + init_values * torch.ones((embed_dims)), requires_grad=True) + self.gamma_2 = nn.Parameter( + init_values * torch.ones((embed_dims)), requires_grad=True) + + def build_attn(self, attn_cfg): + self.attn = BEiTAttention(**attn_cfg) + + def forward(self, x): + x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x))) + x = x + self.drop_path(self.gamma_2 * self.ffn(self.norm2(x))) + return x + + +@BACKBONES.register_module() +class BEiT(BaseModule): + """BERT Pre-Training of Image Transformers. + + Args: + img_size (int | tuple): Input image size. Default: 224. + patch_size (int): The patch size. Default: 16. + in_channels (int): Number of input channels. Default: 3. + embed_dims (int): Embedding dimension. Default: 768. + num_layers (int): Depth of transformer. Default: 12. + num_heads (int): Number of attention heads. Default: 12. + mlp_ratio (int): Ratio of mlp hidden dim to embedding dim. + Default: 4. + out_indices (list | tuple | int): Output from which stages. + Default: -1. + qv_bias (bool): Enable bias for qv if True. Default: True. + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + drop_path_rate (float): Stochastic depth rate. Default 0.0. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + patch_norm (bool): Whether to add a norm in PatchEmbed Block. + Default: False. + final_norm (bool): Whether to add a additional layer to normalize + final feature map. Default: False. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + pretrained (str, optional): Model pretrained path. Default: None. + init_values (float): Initialize the values of BEiTAttention and FFN + with learnable scaling. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=-1, + qv_bias=True, + attn_drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN'), + act_cfg=dict(type='GELU'), + patch_norm=False, + final_norm=False, + num_fcs=2, + norm_eval=False, + pretrained=None, + init_values=0.1, + init_cfg=None): + super(BEiT, self).__init__(init_cfg=init_cfg) + if isinstance(img_size, int): + img_size = to_2tuple(img_size) + elif isinstance(img_size, tuple): + if len(img_size) == 1: + img_size = to_2tuple(img_size[0]) + assert len(img_size) == 2, \ + f'The size of image should have length 1 or 2, ' \ + f'but got {len(img_size)}' + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be set at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + + self.in_channels = in_channels + self.img_size = img_size + self.patch_size = patch_size + self.norm_eval = norm_eval + self.pretrained = pretrained + self.num_layers = num_layers + self.embed_dims = embed_dims + self.num_heads = num_heads + self.mlp_ratio = mlp_ratio + self.attn_drop_rate = attn_drop_rate + self.drop_path_rate = drop_path_rate + self.num_fcs = num_fcs + self.qv_bias = qv_bias + self.act_cfg = act_cfg + self.norm_cfg = norm_cfg + self.patch_norm = patch_norm + self.init_values = init_values + self.window_size = (img_size[0] // patch_size, + img_size[1] // patch_size) + self.patch_shape = self.window_size + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims)) + + self._build_patch_embedding() + self._build_layers() + + if isinstance(out_indices, int): + if out_indices == -1: + out_indices = num_layers - 1 + self.out_indices = [out_indices] + elif isinstance(out_indices, list) or isinstance(out_indices, tuple): + self.out_indices = out_indices + else: + raise TypeError('out_indices must be type of int, list or tuple') + + self.final_norm = final_norm + if final_norm: + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, embed_dims, postfix=1) + self.add_module(self.norm1_name, norm1) + + def _build_patch_embedding(self): + """Build patch embedding layer.""" + self.patch_embed = PatchEmbed( + in_channels=self.in_channels, + embed_dims=self.embed_dims, + conv_type='Conv2d', + kernel_size=self.patch_size, + stride=self.patch_size, + padding=0, + norm_cfg=self.norm_cfg if self.patch_norm else None, + init_cfg=None) + + def _build_layers(self): + """Build transformer encoding layers.""" + + dpr = [ + x.item() + for x in torch.linspace(0, self.drop_path_rate, self.num_layers) + ] + self.layers = ModuleList() + for i in range(self.num_layers): + self.layers.append( + BEiTTransformerEncoderLayer( + embed_dims=self.embed_dims, + num_heads=self.num_heads, + feedforward_channels=self.mlp_ratio * self.embed_dims, + attn_drop_rate=self.attn_drop_rate, + drop_path_rate=dpr[i], + num_fcs=self.num_fcs, + bias='qv_bias' if self.qv_bias else False, + act_cfg=self.act_cfg, + norm_cfg=self.norm_cfg, + window_size=self.window_size, + init_values=self.init_values)) + + @property + def norm1(self): + return getattr(self, self.norm1_name) + + def _geometric_sequence_interpolation(self, src_size, dst_size, sequence, + num): + """Get new sequence via geometric sequence interpolation. + + Args: + src_size (int): Pos_embedding size in pre-trained model. + dst_size (int): Pos_embedding size in the current model. + sequence (tensor): The relative position bias of the pretrain + model after removing the extra tokens. + num (int): Number of attention heads. + Returns: + new_sequence (tensor): Geometric sequence interpolate the + pre-trained relative position bias to the size of + the current model. + """ + + def geometric_progression(a, r, n): + return a * (1.0 - r**n) / (1.0 - r) + + # Here is a binary function. + left, right = 1.01, 1.5 + while right - left > 1e-6: + q = (left + right) / 2.0 + gp = geometric_progression(1, q, src_size // 2) + if gp > dst_size // 2: + right = q + else: + left = q + # The position of each interpolated point is determined + # by the ratio obtained by dichotomy. + dis = [] + cur = 1 + for i in range(src_size // 2): + dis.append(cur) + cur += q**(i + 1) + r_ids = [-_ for _ in reversed(dis)] + x = r_ids + [0] + dis + y = r_ids + [0] + dis + t = dst_size // 2.0 + dx = np.arange(-t, t + 0.1, 1.0) + dy = np.arange(-t, t + 0.1, 1.0) + # Interpolation functions are being executed and called. + new_sequence = [] + for i in range(num): + z = sequence[:, i].view(src_size, src_size).float().numpy() + f = interpolate.interp2d(x, y, z, kind='cubic') + new_sequence.append( + torch.Tensor(f(dx, dy)).contiguous().view(-1, 1).to(sequence)) + new_sequence = torch.cat(new_sequence, dim=-1) + return new_sequence + + def resize_rel_pos_embed(self, checkpoint): + """Resize relative pos_embed weights. + + This function is modified from + https://github.com/microsoft/unilm/blob/master/beit/semantic_segmentation/mmcv_custom/checkpoint.py. # noqa: E501 + Copyright (c) Microsoft Corporation + Licensed under the MIT License + Args: + checkpoint (dict): Key and value of the pretrain model. + Returns: + state_dict (dict): Interpolate the relative pos_embed weights + in the pre-train model to the current model size. + """ + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + all_keys = list(state_dict.keys()) + for key in all_keys: + if 'relative_position_index' in key: + state_dict.pop(key) + # In order to keep the center of pos_bias as consistent as + # possible after interpolation, and vice versa in the edge + # area, the geometric sequence interpolation method is adopted. + if 'relative_position_bias_table' in key: + rel_pos_bias = state_dict[key] + src_num_pos, num_attn_heads = rel_pos_bias.size() + dst_num_pos, _ = self.state_dict()[key].size() + dst_patch_shape = self.patch_shape + if dst_patch_shape[0] != dst_patch_shape[1]: + raise NotImplementedError() + # Count the number of extra tokens. + num_extra_tokens = dst_num_pos - ( + dst_patch_shape[0] * 2 - 1) * ( + dst_patch_shape[1] * 2 - 1) + src_size = int((src_num_pos - num_extra_tokens)**0.5) + dst_size = int((dst_num_pos - num_extra_tokens)**0.5) + if src_size != dst_size: + extra_tokens = rel_pos_bias[-num_extra_tokens:, :] + rel_pos_bias = rel_pos_bias[:-num_extra_tokens, :] + new_rel_pos_bias = self._geometric_sequence_interpolation( + src_size, dst_size, rel_pos_bias, num_attn_heads) + new_rel_pos_bias = torch.cat( + (new_rel_pos_bias, extra_tokens), dim=0) + state_dict[key] = new_rel_pos_bias + + return state_dict + + def init_weights(self): + + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + self.apply(_init_weights) + + if (isinstance(self.init_cfg, dict) + and self.init_cfg.get('type') == 'Pretrained'): + logger = get_root_logger() + checkpoint = _load_checkpoint( + self.init_cfg['checkpoint'], logger=logger, map_location='cpu') + state_dict = self.resize_rel_pos_embed(checkpoint) + self.load_state_dict(state_dict, False) + elif self.init_cfg is not None: + super(BEiT, self).init_weights() + else: + # We only implement the 'jax_impl' initialization implemented at + # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 + # Copyright 2019 Ross Wightman + # Licensed under the Apache License, Version 2.0 (the "License") + trunc_normal_(self.cls_token, std=.02) + for n, m in self.named_modules(): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + if 'ffn' in n: + nn.init.normal_(m.bias, mean=0., std=1e-6) + else: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Conv2d): + kaiming_init(m, mode='fan_in', bias=0.) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m, val=1.0, bias=0.) + + def forward(self, inputs): + B = inputs.shape[0] + + x, hw_shape = self.patch_embed(inputs) + + # stole cls_tokens impl from Phil Wang, thanks + cls_tokens = self.cls_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + + outs = [] + for i, layer in enumerate(self.layers): + x = layer(x) + if i == len(self.layers) - 1: + if self.final_norm: + x = self.norm1(x) + if i in self.out_indices: + # Remove class token and reshape token for decoder head + out = x[:, 1:] + B, _, C = out.shape + out = out.reshape(B, hw_shape[0], hw_shape[1], + C).permute(0, 3, 1, 2).contiguous() + outs.append(out) + + return tuple(outs) + + def train(self, mode=True): + super(BEiT, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, nn.LayerNorm): + m.eval() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/bisenetv1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/bisenetv1.py new file mode 100644 index 0000000..4beb7b3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/bisenetv1.py @@ -0,0 +1,332 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmcv.runner import BaseModule + +from mmseg.ops import resize +from ..builder import BACKBONES, build_backbone + + +class SpatialPath(BaseModule): + """Spatial Path to preserve the spatial size of the original input image + and encode affluent spatial information. + + Args: + in_channels(int): The number of channels of input + image. Default: 3. + num_channels (Tuple[int]): The number of channels of + each layers in Spatial Path. + Default: (64, 64, 64, 128). + Returns: + x (torch.Tensor): Feature map for Feature Fusion Module. + """ + + def __init__(self, + in_channels=3, + num_channels=(64, 64, 64, 128), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(SpatialPath, self).__init__(init_cfg=init_cfg) + assert len(num_channels) == 4, 'Length of input channels \ + of Spatial Path must be 4!' + + self.layers = [] + for i in range(len(num_channels)): + layer_name = f'layer{i + 1}' + self.layers.append(layer_name) + if i == 0: + self.add_module( + layer_name, + ConvModule( + in_channels=in_channels, + out_channels=num_channels[i], + kernel_size=7, + stride=2, + padding=3, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + elif i == len(num_channels) - 1: + self.add_module( + layer_name, + ConvModule( + in_channels=num_channels[i - 1], + out_channels=num_channels[i], + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + else: + self.add_module( + layer_name, + ConvModule( + in_channels=num_channels[i - 1], + out_channels=num_channels[i], + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, x): + for i, layer_name in enumerate(self.layers): + layer_stage = getattr(self, layer_name) + x = layer_stage(x) + return x + + +class AttentionRefinementModule(BaseModule): + """Attention Refinement Module (ARM) to refine the features of each stage. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + Returns: + x_out (torch.Tensor): Feature map of Attention Refinement Module. + """ + + def __init__(self, + in_channels, + out_channel, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(AttentionRefinementModule, self).__init__(init_cfg=init_cfg) + self.conv_layer = ConvModule( + in_channels=in_channels, + out_channels=out_channel, + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.atten_conv_layer = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + ConvModule( + in_channels=out_channel, + out_channels=out_channel, + kernel_size=1, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None), nn.Sigmoid()) + + def forward(self, x): + x = self.conv_layer(x) + x_atten = self.atten_conv_layer(x) + x_out = x * x_atten + return x_out + + +class ContextPath(BaseModule): + """Context Path to provide sufficient receptive field. + + Args: + backbone_cfg:(dict): Config of backbone of + Context Path. + context_channels (Tuple[int]): The number of channel numbers + of various modules in Context Path. + Default: (128, 256, 512). + align_corners (bool, optional): The align_corners argument of + resize operation. Default: False. + Returns: + x_16_up, x_32_up (torch.Tensor, torch.Tensor): Two feature maps + undergoing upsampling from 1/16 and 1/32 downsampling + feature maps. These two feature maps are used for Feature + Fusion Module and Auxiliary Head. + """ + + def __init__(self, + backbone_cfg, + context_channels=(128, 256, 512), + align_corners=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(ContextPath, self).__init__(init_cfg=init_cfg) + assert len(context_channels) == 3, 'Length of input channels \ + of Context Path must be 3!' + + self.backbone = build_backbone(backbone_cfg) + + self.align_corners = align_corners + self.arm16 = AttentionRefinementModule(context_channels[1], + context_channels[0]) + self.arm32 = AttentionRefinementModule(context_channels[2], + context_channels[0]) + self.conv_head32 = ConvModule( + in_channels=context_channels[0], + out_channels=context_channels[0], + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv_head16 = ConvModule( + in_channels=context_channels[0], + out_channels=context_channels[0], + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.gap_conv = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + ConvModule( + in_channels=context_channels[2], + out_channels=context_channels[0], + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, x): + x_4, x_8, x_16, x_32 = self.backbone(x) + x_gap = self.gap_conv(x_32) + + x_32_arm = self.arm32(x_32) + x_32_sum = x_32_arm + x_gap + x_32_up = resize(input=x_32_sum, size=x_16.shape[2:], mode='nearest') + x_32_up = self.conv_head32(x_32_up) + + x_16_arm = self.arm16(x_16) + x_16_sum = x_16_arm + x_32_up + x_16_up = resize(input=x_16_sum, size=x_8.shape[2:], mode='nearest') + x_16_up = self.conv_head16(x_16_up) + + return x_16_up, x_32_up + + +class FeatureFusionModule(BaseModule): + """Feature Fusion Module to fuse low level output feature of Spatial Path + and high level output feature of Context Path. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + Returns: + x_out (torch.Tensor): Feature map of Feature Fusion Module. + """ + + def __init__(self, + in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(FeatureFusionModule, self).__init__(init_cfg=init_cfg) + self.conv1 = ConvModule( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.gap = nn.AdaptiveAvgPool2d((1, 1)) + self.conv_atten = nn.Sequential( + ConvModule( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), nn.Sigmoid()) + + def forward(self, x_sp, x_cp): + x_concat = torch.cat([x_sp, x_cp], dim=1) + x_fuse = self.conv1(x_concat) + x_atten = self.gap(x_fuse) + # Note: No BN and more 1x1 conv in paper. + x_atten = self.conv_atten(x_atten) + x_atten = x_fuse * x_atten + x_out = x_atten + x_fuse + return x_out + + +@BACKBONES.register_module() +class BiSeNetV1(BaseModule): + """BiSeNetV1 backbone. + + This backbone is the implementation of `BiSeNet: Bilateral + Segmentation Network for Real-time Semantic + Segmentation `_. + + Args: + backbone_cfg:(dict): Config of backbone of + Context Path. + in_channels (int): The number of channels of input + image. Default: 3. + spatial_channels (Tuple[int]): Size of channel numbers of + various layers in Spatial Path. + Default: (64, 64, 64, 128). + context_channels (Tuple[int]): Size of channel numbers of + various modules in Context Path. + Default: (128, 256, 512). + out_indices (Tuple[int] | int, optional): Output from which stages. + Default: (0, 1, 2). + align_corners (bool, optional): The align_corners argument of + resize operation in Bilateral Guided Aggregation Layer. + Default: False. + out_channels(int): The number of channels of output. + It must be the same with `in_channels` of decode_head. + Default: 256. + """ + + def __init__(self, + backbone_cfg, + in_channels=3, + spatial_channels=(64, 64, 64, 128), + context_channels=(128, 256, 512), + out_indices=(0, 1, 2), + align_corners=False, + out_channels=256, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + init_cfg=None): + + super(BiSeNetV1, self).__init__(init_cfg=init_cfg) + assert len(spatial_channels) == 4, 'Length of input channels \ + of Spatial Path must be 4!' + + assert len(context_channels) == 3, 'Length of input channels \ + of Context Path must be 3!' + + self.out_indices = out_indices + self.align_corners = align_corners + self.context_path = ContextPath(backbone_cfg, context_channels, + self.align_corners) + self.spatial_path = SpatialPath(in_channels, spatial_channels) + self.ffm = FeatureFusionModule(context_channels[1], out_channels) + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + def forward(self, x): + # stole refactoring code from Coin Cheung, thanks + x_context8, x_context16 = self.context_path(x) + x_spatial = self.spatial_path(x) + x_fuse = self.ffm(x_spatial, x_context8) + + outs = [x_fuse, x_context8, x_context16] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/bisenetv2.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/bisenetv2.py new file mode 100644 index 0000000..d908b32 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/bisenetv2.py @@ -0,0 +1,622 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, + build_activation_layer, build_norm_layer) +from mmcv.runner import BaseModule + +from mmseg.ops import resize +from ..builder import BACKBONES + + +class DetailBranch(BaseModule): + """Detail Branch with wide channels and shallow layers to capture low-level + details and generate high-resolution feature representation. + + Args: + detail_channels (Tuple[int]): Size of channel numbers of each stage + in Detail Branch, in paper it has 3 stages. + Default: (64, 64, 128). + in_channels (int): Number of channels of input image. Default: 3. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + x (torch.Tensor): Feature map of Detail Branch. + """ + + def __init__(self, + detail_channels=(64, 64, 128), + in_channels=3, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(DetailBranch, self).__init__(init_cfg=init_cfg) + detail_branch = [] + for i in range(len(detail_channels)): + if i == 0: + detail_branch.append( + nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=detail_channels[i], + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=detail_channels[i], + out_channels=detail_channels[i], + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg))) + else: + detail_branch.append( + nn.Sequential( + ConvModule( + in_channels=detail_channels[i - 1], + out_channels=detail_channels[i], + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=detail_channels[i], + out_channels=detail_channels[i], + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=detail_channels[i], + out_channels=detail_channels[i], + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg))) + self.detail_branch = nn.ModuleList(detail_branch) + + def forward(self, x): + for stage in self.detail_branch: + x = stage(x) + return x + + +class StemBlock(BaseModule): + """Stem Block at the beginning of Semantic Branch. + + Args: + in_channels (int): Number of input channels. + Default: 3. + out_channels (int): Number of output channels. + Default: 16. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + x (torch.Tensor): First feature map in Semantic Branch. + """ + + def __init__(self, + in_channels=3, + out_channels=16, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(StemBlock, self).__init__(init_cfg=init_cfg) + + self.conv_first = ConvModule( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.convs = nn.Sequential( + ConvModule( + in_channels=out_channels, + out_channels=out_channels // 2, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + in_channels=out_channels // 2, + out_channels=out_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.pool = nn.MaxPool2d( + kernel_size=3, stride=2, padding=1, ceil_mode=False) + self.fuse_last = ConvModule( + in_channels=out_channels * 2, + out_channels=out_channels, + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x): + x = self.conv_first(x) + x_left = self.convs(x) + x_right = self.pool(x) + x = self.fuse_last(torch.cat([x_left, x_right], dim=1)) + return x + + +class GELayer(BaseModule): + """Gather-and-Expansion Layer. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + exp_ratio (int): Expansion ratio for middle channels. + Default: 6. + stride (int): Stride of GELayer. Default: 1 + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + x (torch.Tensor): Intermediate feature map in + Semantic Branch. + """ + + def __init__(self, + in_channels, + out_channels, + exp_ratio=6, + stride=1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(GELayer, self).__init__(init_cfg=init_cfg) + mid_channel = in_channels * exp_ratio + self.conv1 = ConvModule( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if stride == 1: + self.dwconv = nn.Sequential( + # ReLU in ConvModule not shown in paper + ConvModule( + in_channels=in_channels, + out_channels=mid_channel, + kernel_size=3, + stride=stride, + padding=1, + groups=in_channels, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.shortcut = None + else: + self.dwconv = nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=mid_channel, + kernel_size=3, + stride=stride, + padding=1, + groups=in_channels, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None), + # ReLU in ConvModule not shown in paper + ConvModule( + in_channels=mid_channel, + out_channels=mid_channel, + kernel_size=3, + stride=1, + padding=1, + groups=mid_channel, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ) + self.shortcut = nn.Sequential( + DepthwiseSeparableConvModule( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + padding=1, + dw_norm_cfg=norm_cfg, + dw_act_cfg=None, + pw_norm_cfg=norm_cfg, + pw_act_cfg=None, + )) + + self.conv2 = nn.Sequential( + ConvModule( + in_channels=mid_channel, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None, + )) + + self.act = build_activation_layer(act_cfg) + + def forward(self, x): + identity = x + x = self.conv1(x) + x = self.dwconv(x) + x = self.conv2(x) + if self.shortcut is not None: + shortcut = self.shortcut(identity) + x = x + shortcut + else: + x = x + identity + x = self.act(x) + return x + + +class CEBlock(BaseModule): + """Context Embedding Block for large receptive filed in Semantic Branch. + + Args: + in_channels (int): Number of input channels. + Default: 3. + out_channels (int): Number of output channels. + Default: 16. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + x (torch.Tensor): Last feature map in Semantic Branch. + """ + + def __init__(self, + in_channels=3, + out_channels=16, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(CEBlock, self).__init__(init_cfg=init_cfg) + self.in_channels = in_channels + self.out_channels = out_channels + self.gap = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + build_norm_layer(norm_cfg, self.in_channels)[1]) + self.conv_gap = ConvModule( + in_channels=self.in_channels, + out_channels=self.out_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + # Note: in paper here is naive conv2d, no bn-relu + self.conv_last = ConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x): + identity = x + x = self.gap(x) + x = self.conv_gap(x) + x = identity + x + x = self.conv_last(x) + return x + + +class SemanticBranch(BaseModule): + """Semantic Branch which is lightweight with narrow channels and deep + layers to obtain high-level semantic context. + + Args: + semantic_channels(Tuple[int]): Size of channel numbers of + various stages in Semantic Branch. + Default: (16, 32, 64, 128). + in_channels (int): Number of channels of input image. Default: 3. + exp_ratio (int): Expansion ratio for middle channels. + Default: 6. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + semantic_outs (List[torch.Tensor]): List of several feature maps + for auxiliary heads (Booster) and Bilateral + Guided Aggregation Layer. + """ + + def __init__(self, + semantic_channels=(16, 32, 64, 128), + in_channels=3, + exp_ratio=6, + init_cfg=None): + super(SemanticBranch, self).__init__(init_cfg=init_cfg) + self.in_channels = in_channels + self.semantic_channels = semantic_channels + self.semantic_stages = [] + for i in range(len(semantic_channels)): + stage_name = f'stage{i + 1}' + self.semantic_stages.append(stage_name) + if i == 0: + self.add_module( + stage_name, + StemBlock(self.in_channels, semantic_channels[i])) + elif i == (len(semantic_channels) - 1): + self.add_module( + stage_name, + nn.Sequential( + GELayer(semantic_channels[i - 1], semantic_channels[i], + exp_ratio, 2), + GELayer(semantic_channels[i], semantic_channels[i], + exp_ratio, 1), + GELayer(semantic_channels[i], semantic_channels[i], + exp_ratio, 1), + GELayer(semantic_channels[i], semantic_channels[i], + exp_ratio, 1))) + else: + self.add_module( + stage_name, + nn.Sequential( + GELayer(semantic_channels[i - 1], semantic_channels[i], + exp_ratio, 2), + GELayer(semantic_channels[i], semantic_channels[i], + exp_ratio, 1))) + + self.add_module(f'stage{len(semantic_channels)}_CEBlock', + CEBlock(semantic_channels[-1], semantic_channels[-1])) + self.semantic_stages.append(f'stage{len(semantic_channels)}_CEBlock') + + def forward(self, x): + semantic_outs = [] + for stage_name in self.semantic_stages: + semantic_stage = getattr(self, stage_name) + x = semantic_stage(x) + semantic_outs.append(x) + return semantic_outs + + +class BGALayer(BaseModule): + """Bilateral Guided Aggregation Layer to fuse the complementary information + from both Detail Branch and Semantic Branch. + + Args: + out_channels (int): Number of output channels. + Default: 128. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + Returns: + output (torch.Tensor): Output feature map for Segment heads. + """ + + def __init__(self, + out_channels=128, + align_corners=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(BGALayer, self).__init__(init_cfg=init_cfg) + self.out_channels = out_channels + self.align_corners = align_corners + self.detail_dwconv = nn.Sequential( + DepthwiseSeparableConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=1, + padding=1, + dw_norm_cfg=norm_cfg, + dw_act_cfg=None, + pw_norm_cfg=None, + pw_act_cfg=None, + )) + self.detail_down = nn.Sequential( + ConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=2, + padding=1, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None), + nn.AvgPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=False)) + self.semantic_conv = nn.Sequential( + ConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None)) + self.semantic_dwconv = nn.Sequential( + DepthwiseSeparableConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=1, + padding=1, + dw_norm_cfg=norm_cfg, + dw_act_cfg=None, + pw_norm_cfg=None, + pw_act_cfg=None, + )) + self.conv = ConvModule( + in_channels=self.out_channels, + out_channels=self.out_channels, + kernel_size=3, + stride=1, + padding=1, + inplace=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + ) + + def forward(self, x_d, x_s): + detail_dwconv = self.detail_dwconv(x_d) + detail_down = self.detail_down(x_d) + semantic_conv = self.semantic_conv(x_s) + semantic_dwconv = self.semantic_dwconv(x_s) + semantic_conv = resize( + input=semantic_conv, + size=detail_dwconv.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + fuse_1 = detail_dwconv * torch.sigmoid(semantic_conv) + fuse_2 = detail_down * torch.sigmoid(semantic_dwconv) + fuse_2 = resize( + input=fuse_2, + size=fuse_1.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + output = self.conv(fuse_1 + fuse_2) + return output + + +@BACKBONES.register_module() +class BiSeNetV2(BaseModule): + """BiSeNetV2: Bilateral Network with Guided Aggregation for + Real-time Semantic Segmentation. + + This backbone is the implementation of + `BiSeNetV2 `_. + + Args: + in_channels (int): Number of channel of input image. Default: 3. + detail_channels (Tuple[int], optional): Channels of each stage + in Detail Branch. Default: (64, 64, 128). + semantic_channels (Tuple[int], optional): Channels of each stage + in Semantic Branch. Default: (16, 32, 64, 128). + See Table 1 and Figure 3 of paper for more details. + semantic_expansion_ratio (int, optional): The expansion factor + expanding channel number of middle channels in Semantic Branch. + Default: 6. + bga_channels (int, optional): Number of middle channels in + Bilateral Guided Aggregation Layer. Default: 128. + out_indices (Tuple[int] | int, optional): Output from which stages. + Default: (0, 1, 2, 3, 4). + align_corners (bool, optional): The align_corners argument of + resize operation in Bilateral Guided Aggregation Layer. + Default: False. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels=3, + detail_channels=(64, 64, 128), + semantic_channels=(16, 32, 64, 128), + semantic_expansion_ratio=6, + bga_channels=128, + out_indices=(0, 1, 2, 3, 4), + align_corners=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + if init_cfg is None: + init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', val=1, layer=['_BatchNorm', 'GroupNorm']) + ] + super(BiSeNetV2, self).__init__(init_cfg=init_cfg) + self.in_channels = in_channels + self.out_indices = out_indices + self.detail_channels = detail_channels + self.semantic_channels = semantic_channels + self.semantic_expansion_ratio = semantic_expansion_ratio + self.bga_channels = bga_channels + self.align_corners = align_corners + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + self.detail = DetailBranch(self.detail_channels, self.in_channels) + self.semantic = SemanticBranch(self.semantic_channels, + self.in_channels, + self.semantic_expansion_ratio) + self.bga = BGALayer(self.bga_channels, self.align_corners) + + def forward(self, x): + # stole refactoring code from Coin Cheung, thanks + x_detail = self.detail(x) + x_semantic_lst = self.semantic(x) + x_head = self.bga(x_detail, x_semantic_lst[-1]) + outs = [x_head] + x_semantic_lst[:-1] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/cgnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/cgnet.py new file mode 100644 index 0000000..168194c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/cgnet.py @@ -0,0 +1,372 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import ConvModule, build_conv_layer, build_norm_layer +from mmcv.runner import BaseModule +from mmcv.utils.parrots_wrapper import _BatchNorm + +from ..builder import BACKBONES + + +class GlobalContextExtractor(nn.Module): + """Global Context Extractor for CGNet. + + This class is employed to refine the joint feature of both local feature + and surrounding context. + + Args: + channel (int): Number of input feature channels. + reduction (int): Reductions for global context extractor. Default: 16. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, channel, reduction=16, with_cp=False): + super(GlobalContextExtractor, self).__init__() + self.channel = channel + self.reduction = reduction + assert reduction >= 1 and channel >= reduction + self.with_cp = with_cp + self.avg_pool = nn.AdaptiveAvgPool2d(1) + self.fc = nn.Sequential( + nn.Linear(channel, channel // reduction), nn.ReLU(inplace=True), + nn.Linear(channel // reduction, channel), nn.Sigmoid()) + + def forward(self, x): + + def _inner_forward(x): + num_batch, num_channel = x.size()[:2] + y = self.avg_pool(x).view(num_batch, num_channel) + y = self.fc(y).view(num_batch, num_channel, 1, 1) + return x * y + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class ContextGuidedBlock(nn.Module): + """Context Guided Block for CGNet. + + This class consists of four components: local feature extractor, + surrounding feature extractor, joint feature extractor and global + context extractor. + + Args: + in_channels (int): Number of input feature channels. + out_channels (int): Number of output feature channels. + dilation (int): Dilation rate for surrounding context extractor. + Default: 2. + reduction (int): Reduction for global context extractor. Default: 16. + skip_connect (bool): Add input to output or not. Default: True. + downsample (bool): Downsample the input to 1/2 or not. Default: False. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + in_channels, + out_channels, + dilation=2, + reduction=16, + skip_connect=True, + downsample=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + with_cp=False): + super(ContextGuidedBlock, self).__init__() + self.with_cp = with_cp + self.downsample = downsample + + channels = out_channels if downsample else out_channels // 2 + if 'type' in act_cfg and act_cfg['type'] == 'PReLU': + act_cfg['num_parameters'] = channels + kernel_size = 3 if downsample else 1 + stride = 2 if downsample else 1 + padding = (kernel_size - 1) // 2 + + self.conv1x1 = ConvModule( + in_channels, + channels, + kernel_size, + stride, + padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + self.f_loc = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=1, + groups=channels, + bias=False) + self.f_sur = build_conv_layer( + conv_cfg, + channels, + channels, + kernel_size=3, + padding=dilation, + groups=channels, + dilation=dilation, + bias=False) + + self.bn = build_norm_layer(norm_cfg, 2 * channels)[1] + self.activate = nn.PReLU(2 * channels) + + if downsample: + self.bottleneck = build_conv_layer( + conv_cfg, + 2 * channels, + out_channels, + kernel_size=1, + bias=False) + + self.skip_connect = skip_connect and not downsample + self.f_glo = GlobalContextExtractor(out_channels, reduction, with_cp) + + def forward(self, x): + + def _inner_forward(x): + out = self.conv1x1(x) + loc = self.f_loc(out) + sur = self.f_sur(out) + + joi_feat = torch.cat([loc, sur], 1) # the joint feature + joi_feat = self.bn(joi_feat) + joi_feat = self.activate(joi_feat) + if self.downsample: + joi_feat = self.bottleneck(joi_feat) # channel = out_channels + # f_glo is employed to refine the joint feature + out = self.f_glo(joi_feat) + + if self.skip_connect: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InputInjection(nn.Module): + """Downsampling module for CGNet.""" + + def __init__(self, num_downsampling): + super(InputInjection, self).__init__() + self.pool = nn.ModuleList() + for i in range(num_downsampling): + self.pool.append(nn.AvgPool2d(3, stride=2, padding=1)) + + def forward(self, x): + for pool in self.pool: + x = pool(x) + return x + + +@BACKBONES.register_module() +class CGNet(BaseModule): + """CGNet backbone. + + This backbone is the implementation of `A Light-weight Context Guided + Network for Semantic Segmentation `_. + + Args: + in_channels (int): Number of input image channels. Normally 3. + num_channels (tuple[int]): Numbers of feature channels at each stages. + Default: (32, 64, 128). + num_blocks (tuple[int]): Numbers of CG blocks at stage 1 and stage 2. + Default: (3, 21). + dilations (tuple[int]): Dilation rate for surrounding context + extractors at stage 1 and stage 2. Default: (2, 4). + reductions (tuple[int]): Reductions for global context extractors at + stage 1 and stage 2. Default: (8, 16). + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN', requires_grad=True). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='PReLU'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + pretrained (str, optional): model pretrained path. Default: None + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + """ + + def __init__(self, + in_channels=3, + num_channels=(32, 64, 128), + num_blocks=(3, 21), + dilations=(2, 4), + reductions=(8, 16), + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='PReLU'), + norm_eval=False, + with_cp=False, + pretrained=None, + init_cfg=None): + + super(CGNet, self).__init__(init_cfg) + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is a deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer=['Conv2d', 'Linear']), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']), + dict(type='Constant', val=0, layer='PReLU') + ] + else: + raise TypeError('pretrained must be a str or None') + + self.in_channels = in_channels + self.num_channels = num_channels + assert isinstance(self.num_channels, tuple) and len( + self.num_channels) == 3 + self.num_blocks = num_blocks + assert isinstance(self.num_blocks, tuple) and len(self.num_blocks) == 2 + self.dilations = dilations + assert isinstance(self.dilations, tuple) and len(self.dilations) == 2 + self.reductions = reductions + assert isinstance(self.reductions, tuple) and len(self.reductions) == 2 + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + if 'type' in self.act_cfg and self.act_cfg['type'] == 'PReLU': + self.act_cfg['num_parameters'] = num_channels[0] + self.norm_eval = norm_eval + self.with_cp = with_cp + + cur_channels = in_channels + self.stem = nn.ModuleList() + for i in range(3): + self.stem.append( + ConvModule( + cur_channels, + num_channels[0], + 3, + 2 if i == 0 else 1, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + cur_channels = num_channels[0] + + self.inject_2x = InputInjection(1) # down-sample for Input, factor=2 + self.inject_4x = InputInjection(2) # down-sample for Input, factor=4 + + cur_channels += in_channels + self.norm_prelu_0 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + # stage 1 + self.level1 = nn.ModuleList() + for i in range(num_blocks[0]): + self.level1.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[1], + num_channels[1], + dilations[0], + reductions[0], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) # CG block + + cur_channels = 2 * num_channels[1] + in_channels + self.norm_prelu_1 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + # stage 2 + self.level2 = nn.ModuleList() + for i in range(num_blocks[1]): + self.level2.append( + ContextGuidedBlock( + cur_channels if i == 0 else num_channels[2], + num_channels[2], + dilations[1], + reductions[1], + downsample=(i == 0), + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + with_cp=with_cp)) # CG block + + cur_channels = 2 * num_channels[2] + self.norm_prelu_2 = nn.Sequential( + build_norm_layer(norm_cfg, cur_channels)[1], + nn.PReLU(cur_channels)) + + def forward(self, x): + output = [] + + # stage 0 + inp_2x = self.inject_2x(x) + inp_4x = self.inject_4x(x) + for layer in self.stem: + x = layer(x) + x = self.norm_prelu_0(torch.cat([x, inp_2x], 1)) + output.append(x) + + # stage 1 + for i, layer in enumerate(self.level1): + x = layer(x) + if i == 0: + down1 = x + x = self.norm_prelu_1(torch.cat([x, down1, inp_4x], 1)) + output.append(x) + + # stage 2 + for i, layer in enumerate(self.level2): + x = layer(x) + if i == 0: + down2 = x + x = self.norm_prelu_2(torch.cat([down2, x], 1)) + output.append(x) + + return output + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super(CGNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/erfnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/erfnet.py new file mode 100644 index 0000000..8921c18 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/erfnet.py @@ -0,0 +1,329 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import build_activation_layer, build_conv_layer, build_norm_layer +from mmcv.runner import BaseModule + +from mmseg.ops import resize +from ..builder import BACKBONES + + +class DownsamplerBlock(BaseModule): + """Downsampler block of ERFNet. + + This module is a little different from basical ConvModule. + The features from Conv and MaxPool layers are + concatenated before BatchNorm. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN', eps=1e-3), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(DownsamplerBlock, self).__init__(init_cfg=init_cfg) + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + self.conv = build_conv_layer( + self.conv_cfg, + in_channels, + out_channels - in_channels, + kernel_size=3, + stride=2, + padding=1) + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + self.bn = build_norm_layer(self.norm_cfg, out_channels)[1] + self.act = build_activation_layer(self.act_cfg) + + def forward(self, input): + conv_out = self.conv(input) + pool_out = self.pool(input) + pool_out = resize( + input=pool_out, + size=conv_out.size()[2:], + mode='bilinear', + align_corners=False) + output = torch.cat([conv_out, pool_out], 1) + output = self.bn(output) + output = self.act(output) + return output + + +class NonBottleneck1d(BaseModule): + """Non-bottleneck block of ERFNet. + + Args: + channels (int): Number of channels in Non-bottleneck block. + drop_rate (float): Probability of an element to be zeroed. + Default 0. + dilation (int): Dilation rate for last two conv layers. + Default 1. + num_conv_layer (int): Number of 3x1 and 1x3 convolution layers. + Default 2. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + channels, + drop_rate=0, + dilation=1, + num_conv_layer=2, + conv_cfg=None, + norm_cfg=dict(type='BN', eps=1e-3), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(NonBottleneck1d, self).__init__(init_cfg=init_cfg) + + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.act = build_activation_layer(self.act_cfg) + + self.convs_layers = nn.ModuleList() + for conv_layer in range(num_conv_layer): + first_conv_padding = (1, 0) if conv_layer == 0 else (dilation, 0) + first_conv_dilation = 1 if conv_layer == 0 else (dilation, 1) + second_conv_padding = (0, 1) if conv_layer == 0 else (0, dilation) + second_conv_dilation = 1 if conv_layer == 0 else (1, dilation) + + self.convs_layers.append( + build_conv_layer( + self.conv_cfg, + channels, + channels, + kernel_size=(3, 1), + stride=1, + padding=first_conv_padding, + bias=True, + dilation=first_conv_dilation)) + self.convs_layers.append(self.act) + self.convs_layers.append( + build_conv_layer( + self.conv_cfg, + channels, + channels, + kernel_size=(1, 3), + stride=1, + padding=second_conv_padding, + bias=True, + dilation=second_conv_dilation)) + self.convs_layers.append( + build_norm_layer(self.norm_cfg, channels)[1]) + if conv_layer == 0: + self.convs_layers.append(self.act) + else: + self.convs_layers.append(nn.Dropout(p=drop_rate)) + + def forward(self, input): + output = input + for conv in self.convs_layers: + output = conv(output) + output = self.act(output + input) + return output + + +class UpsamplerBlock(BaseModule): + """Upsampler block of ERFNet. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN', eps=1e-3), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(UpsamplerBlock, self).__init__(init_cfg=init_cfg) + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + self.conv = nn.ConvTranspose2d( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=2, + padding=1, + output_padding=1, + bias=True) + self.bn = build_norm_layer(self.norm_cfg, out_channels)[1] + self.act = build_activation_layer(self.act_cfg) + + def forward(self, input): + output = self.conv(input) + output = self.bn(output) + output = self.act(output) + return output + + +@BACKBONES.register_module() +class ERFNet(BaseModule): + """ERFNet backbone. + + This backbone is the implementation of `ERFNet: Efficient Residual + Factorized ConvNet for Real-time SemanticSegmentation + `_. + + Args: + in_channels (int): The number of channels of input + image. Default: 3. + enc_downsample_channels (Tuple[int]): Size of channel + numbers of various Downsampler block in encoder. + Default: (16, 64, 128). + enc_stage_non_bottlenecks (Tuple[int]): Number of stages of + Non-bottleneck block in encoder. + Default: (5, 8). + enc_non_bottleneck_dilations (Tuple[int]): Dilation rate of each + stage of Non-bottleneck block of encoder. + Default: (2, 4, 8, 16). + enc_non_bottleneck_channels (Tuple[int]): Size of channel + numbers of various Non-bottleneck block in encoder. + Default: (64, 128). + dec_upsample_channels (Tuple[int]): Size of channel numbers of + various Deconvolution block in decoder. + Default: (64, 16). + dec_stages_non_bottleneck (Tuple[int]): Number of stages of + Non-bottleneck block in decoder. + Default: (2, 2). + dec_non_bottleneck_channels (Tuple[int]): Size of channel + numbers of various Non-bottleneck block in decoder. + Default: (64, 16). + drop_rate (float): Probability of an element to be zeroed. + Default 0.1. + """ + + def __init__(self, + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + init_cfg=None): + + super(ERFNet, self).__init__(init_cfg=init_cfg) + assert len(enc_downsample_channels) \ + == len(dec_upsample_channels)+1, 'Number of downsample\ + block of encoder does not \ + match number of upsample block of decoder!' + assert len(enc_downsample_channels) \ + == len(enc_stage_non_bottlenecks)+1, 'Number of \ + downsample block of encoder does not match \ + number of Non-bottleneck block of encoder!' + assert len(enc_downsample_channels) \ + == len(enc_non_bottleneck_channels)+1, 'Number of \ + downsample block of encoder does not match \ + number of channels of Non-bottleneck block of encoder!' + assert enc_stage_non_bottlenecks[-1] \ + % len(enc_non_bottleneck_dilations) == 0, 'Number of \ + Non-bottleneck block of encoder does not match \ + number of Non-bottleneck block of encoder!' + assert len(dec_upsample_channels) \ + == len(dec_stages_non_bottleneck), 'Number of \ + upsample block of decoder does not match \ + number of Non-bottleneck block of decoder!' + assert len(dec_stages_non_bottleneck) \ + == len(dec_non_bottleneck_channels), 'Number of \ + Non-bottleneck block of decoder does not match \ + number of channels of Non-bottleneck block of decoder!' + + self.in_channels = in_channels + self.enc_downsample_channels = enc_downsample_channels + self.enc_stage_non_bottlenecks = enc_stage_non_bottlenecks + self.enc_non_bottleneck_dilations = enc_non_bottleneck_dilations + self.enc_non_bottleneck_channels = enc_non_bottleneck_channels + self.dec_upsample_channels = dec_upsample_channels + self.dec_stages_non_bottleneck = dec_stages_non_bottleneck + self.dec_non_bottleneck_channels = dec_non_bottleneck_channels + self.dropout_ratio = dropout_ratio + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + + self.encoder.append( + DownsamplerBlock(self.in_channels, enc_downsample_channels[0])) + + for i in range(len(enc_downsample_channels) - 1): + self.encoder.append( + DownsamplerBlock(enc_downsample_channels[i], + enc_downsample_channels[i + 1])) + # Last part of encoder is some dilated NonBottleneck1d blocks. + if i == len(enc_downsample_channels) - 2: + iteration_times = int(enc_stage_non_bottlenecks[-1] / + len(enc_non_bottleneck_dilations)) + for j in range(iteration_times): + for k in range(len(enc_non_bottleneck_dilations)): + self.encoder.append( + NonBottleneck1d(enc_downsample_channels[-1], + self.dropout_ratio, + enc_non_bottleneck_dilations[k])) + else: + for j in range(enc_stage_non_bottlenecks[i]): + self.encoder.append( + NonBottleneck1d(enc_downsample_channels[i + 1], + self.dropout_ratio)) + + for i in range(len(dec_upsample_channels)): + if i == 0: + self.decoder.append( + UpsamplerBlock(enc_downsample_channels[-1], + dec_non_bottleneck_channels[i])) + else: + self.decoder.append( + UpsamplerBlock(dec_non_bottleneck_channels[i - 1], + dec_non_bottleneck_channels[i])) + for j in range(dec_stages_non_bottleneck[i]): + self.decoder.append( + NonBottleneck1d(dec_non_bottleneck_channels[i])) + + def forward(self, x): + for enc in self.encoder: + x = enc(x) + for dec in self.decoder: + x = dec(x) + return [x] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/fast_scnn.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/fast_scnn.py new file mode 100644 index 0000000..cbfbcaf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/fast_scnn.py @@ -0,0 +1,409 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule +from mmcv.runner import BaseModule + +from mmseg.models.decode_heads.psp_head import PPM +from mmseg.ops import resize +from ..builder import BACKBONES +from ..utils import InvertedResidual + + +class LearningToDownsample(nn.Module): + """Learning to downsample module. + + Args: + in_channels (int): Number of input channels. + dw_channels (tuple[int]): Number of output channels of the first and + the second depthwise conv (dwconv) layers. + out_channels (int): Number of output channels of the whole + 'learning to downsample' module. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + dw_act_cfg (dict): In DepthwiseSeparableConvModule, activation config + of depthwise ConvModule. If it is 'default', it will be the same + as `act_cfg`. Default: None. + """ + + def __init__(self, + in_channels, + dw_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + dw_act_cfg=None): + super(LearningToDownsample, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.dw_act_cfg = dw_act_cfg + dw_channels1 = dw_channels[0] + dw_channels2 = dw_channels[1] + + self.conv = ConvModule( + in_channels, + dw_channels1, + 3, + stride=2, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.dsconv1 = DepthwiseSeparableConvModule( + dw_channels1, + dw_channels2, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + dw_act_cfg=self.dw_act_cfg) + + self.dsconv2 = DepthwiseSeparableConvModule( + dw_channels2, + out_channels, + kernel_size=3, + stride=2, + padding=1, + norm_cfg=self.norm_cfg, + dw_act_cfg=self.dw_act_cfg) + + def forward(self, x): + x = self.conv(x) + x = self.dsconv1(x) + x = self.dsconv2(x) + return x + + +class GlobalFeatureExtractor(nn.Module): + """Global feature extractor module. + + Args: + in_channels (int): Number of input channels of the GFE module. + Default: 64 + block_channels (tuple[int]): Tuple of ints. Each int specifies the + number of output channels of each Inverted Residual module. + Default: (64, 96, 128) + out_channels(int): Number of output channels of the GFE module. + Default: 128 + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + Default: 6 + num_blocks (tuple[int]): Tuple of ints. Each int specifies the + number of times each Inverted Residual module is repeated. + The repeated Inverted Residual modules are called a 'group'. + Default: (3, 3, 3) + strides (tuple[int]): Tuple of ints. Each int specifies + the downsampling factor of each 'group'. + Default: (2, 2, 1) + pool_scales (tuple[int]): Tuple of ints. Each int specifies + the parameter required in 'global average pooling' within PPM. + Default: (1, 2, 3, 6) + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + """ + + def __init__(self, + in_channels=64, + block_channels=(64, 96, 128), + out_channels=128, + expand_ratio=6, + num_blocks=(3, 3, 3), + strides=(2, 2, 1), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False): + super(GlobalFeatureExtractor, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + assert len(block_channels) == len(num_blocks) == 3 + self.bottleneck1 = self._make_layer(in_channels, block_channels[0], + num_blocks[0], strides[0], + expand_ratio) + self.bottleneck2 = self._make_layer(block_channels[0], + block_channels[1], num_blocks[1], + strides[1], expand_ratio) + self.bottleneck3 = self._make_layer(block_channels[1], + block_channels[2], num_blocks[2], + strides[2], expand_ratio) + self.ppm = PPM( + pool_scales, + block_channels[2], + block_channels[2] // 4, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=align_corners) + + self.out = ConvModule( + block_channels[2] * 2, + out_channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _make_layer(self, + in_channels, + out_channels, + blocks, + stride=1, + expand_ratio=6): + layers = [ + InvertedResidual( + in_channels, + out_channels, + stride, + expand_ratio, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + ] + for i in range(1, blocks): + layers.append( + InvertedResidual( + out_channels, + out_channels, + 1, + expand_ratio, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + return nn.Sequential(*layers) + + def forward(self, x): + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = torch.cat([x, *self.ppm(x)], dim=1) + x = self.out(x) + return x + + +class FeatureFusionModule(nn.Module): + """Feature fusion module. + + Args: + higher_in_channels (int): Number of input channels of the + higher-resolution branch. + lower_in_channels (int): Number of input channels of the + lower-resolution branch. + out_channels (int): Number of output channels. + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + dwconv_act_cfg (dict): Config of activation layers in 3x3 conv. + Default: dict(type='ReLU'). + conv_act_cfg (dict): Config of activation layers in the two 1x1 conv. + Default: None. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + """ + + def __init__(self, + higher_in_channels, + lower_in_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dwconv_act_cfg=dict(type='ReLU'), + conv_act_cfg=None, + align_corners=False): + super(FeatureFusionModule, self).__init__() + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.dwconv_act_cfg = dwconv_act_cfg + self.conv_act_cfg = conv_act_cfg + self.align_corners = align_corners + self.dwconv = ConvModule( + lower_in_channels, + out_channels, + 3, + padding=1, + groups=out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.dwconv_act_cfg) + self.conv_lower_res = ConvModule( + out_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.conv_act_cfg) + + self.conv_higher_res = ConvModule( + higher_in_channels, + out_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.conv_act_cfg) + + self.relu = nn.ReLU(True) + + def forward(self, higher_res_feature, lower_res_feature): + lower_res_feature = resize( + lower_res_feature, + size=higher_res_feature.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + lower_res_feature = self.dwconv(lower_res_feature) + lower_res_feature = self.conv_lower_res(lower_res_feature) + + higher_res_feature = self.conv_higher_res(higher_res_feature) + out = higher_res_feature + lower_res_feature + return self.relu(out) + + +@BACKBONES.register_module() +class FastSCNN(BaseModule): + """Fast-SCNN Backbone. + + This backbone is the implementation of `Fast-SCNN: Fast Semantic + Segmentation Network `_. + + Args: + in_channels (int): Number of input image channels. Default: 3. + downsample_dw_channels (tuple[int]): Number of output channels after + the first conv layer & the second conv layer in + Learning-To-Downsample (LTD) module. + Default: (32, 48). + global_in_channels (int): Number of input channels of + Global Feature Extractor(GFE). + Equal to number of output channels of LTD. + Default: 64. + global_block_channels (tuple[int]): Tuple of integers that describe + the output channels for each of the MobileNet-v2 bottleneck + residual blocks in GFE. + Default: (64, 96, 128). + global_block_strides (tuple[int]): Tuple of integers + that describe the strides (downsampling factors) for each of the + MobileNet-v2 bottleneck residual blocks in GFE. + Default: (2, 2, 1). + global_out_channels (int): Number of output channels of GFE. + Default: 128. + higher_in_channels (int): Number of input channels of the higher + resolution branch in FFM. + Equal to global_in_channels. + Default: 64. + lower_in_channels (int): Number of input channels of the lower + resolution branch in FFM. + Equal to global_out_channels. + Default: 128. + fusion_out_channels (int): Number of output channels of FFM. + Default: 128. + out_indices (tuple): Tuple of indices of list + [higher_res_features, lower_res_features, fusion_output]. + Often set to (0,1,2) to enable aux. heads. + Default: (0, 1, 2). + conv_cfg (dict | None): Config of conv layers. Default: None + norm_cfg (dict | None): Config of norm layers. Default: + dict(type='BN') + act_cfg (dict): Config of activation layers. Default: + dict(type='ReLU') + align_corners (bool): align_corners argument of F.interpolate. + Default: False + dw_act_cfg (dict): In DepthwiseSeparableConvModule, activation config + of depthwise ConvModule. If it is 'default', it will be the same + as `act_cfg`. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + """ + + def __init__(self, + in_channels=3, + downsample_dw_channels=(32, 48), + global_in_channels=64, + global_block_channels=(64, 96, 128), + global_block_strides=(2, 2, 1), + global_out_channels=128, + higher_in_channels=64, + lower_in_channels=128, + fusion_out_channels=128, + out_indices=(0, 1, 2), + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + dw_act_cfg=None, + init_cfg=None): + + super(FastSCNN, self).__init__(init_cfg) + + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', val=1, layer=['_BatchNorm', 'GroupNorm']) + ] + + if global_in_channels != higher_in_channels: + raise AssertionError('Global Input Channels must be the same \ + with Higher Input Channels!') + elif global_out_channels != lower_in_channels: + raise AssertionError('Global Output Channels must be the same \ + with Lower Input Channels!') + + self.in_channels = in_channels + self.downsample_dw_channels1 = downsample_dw_channels[0] + self.downsample_dw_channels2 = downsample_dw_channels[1] + self.global_in_channels = global_in_channels + self.global_block_channels = global_block_channels + self.global_block_strides = global_block_strides + self.global_out_channels = global_out_channels + self.higher_in_channels = higher_in_channels + self.lower_in_channels = lower_in_channels + self.fusion_out_channels = fusion_out_channels + self.out_indices = out_indices + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.learning_to_downsample = LearningToDownsample( + in_channels, + downsample_dw_channels, + global_in_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + dw_act_cfg=dw_act_cfg) + self.global_feature_extractor = GlobalFeatureExtractor( + global_in_channels, + global_block_channels, + global_out_channels, + strides=self.global_block_strides, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.feature_fusion = FeatureFusionModule( + higher_in_channels, + lower_in_channels, + fusion_out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + dwconv_act_cfg=self.act_cfg, + align_corners=self.align_corners) + + def forward(self, x): + higher_res_features = self.learning_to_downsample(x) + lower_res_features = self.global_feature_extractor(higher_res_features) + fusion_output = self.feature_fusion(higher_res_features, + lower_res_features) + + outs = [higher_res_features, lower_res_features, fusion_output] + outs = [outs[i] for i in self.out_indices] + return tuple(outs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/hrnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/hrnet.py new file mode 100644 index 0000000..90feadc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/hrnet.py @@ -0,0 +1,642 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +from mmcv.cnn import build_conv_layer, build_norm_layer +from mmcv.runner import BaseModule, ModuleList, Sequential +from mmcv.utils.parrots_wrapper import _BatchNorm + +from mmseg.ops import Upsample, resize +from ..builder import BACKBONES +from .resnet import BasicBlock, Bottleneck + + +class HRModule(BaseModule): + """High-Resolution Module for HRNet. + + In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange + is in this module. + """ + + def __init__(self, + num_branches, + blocks, + num_blocks, + in_channels, + num_channels, + multiscale_output=True, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + block_init_cfg=None, + init_cfg=None): + super(HRModule, self).__init__(init_cfg) + self.block_init_cfg = block_init_cfg + self._check_branches(num_branches, num_blocks, in_channels, + num_channels) + + self.in_channels = in_channels + self.num_branches = num_branches + + self.multiscale_output = multiscale_output + self.norm_cfg = norm_cfg + self.conv_cfg = conv_cfg + self.with_cp = with_cp + self.branches = self._make_branches(num_branches, blocks, num_blocks, + num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU(inplace=False) + + def _check_branches(self, num_branches, num_blocks, in_channels, + num_channels): + """Check branches configuration.""" + if num_branches != len(num_blocks): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_BLOCKS(' \ + f'{len(num_blocks)})' + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_CHANNELS(' \ + f'{len(num_channels)})' + raise ValueError(error_msg) + + if num_branches != len(in_channels): + error_msg = f'NUM_BRANCHES({num_branches}) <> NUM_INCHANNELS(' \ + f'{len(in_channels)})' + raise ValueError(error_msg) + + def _make_one_branch(self, + branch_index, + block, + num_blocks, + num_channels, + stride=1): + """Build one branch.""" + downsample = None + if stride != 1 or \ + self.in_channels[branch_index] != \ + num_channels[branch_index] * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + self.in_channels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, num_channels[branch_index] * + block.expansion)[1]) + + layers = [] + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + init_cfg=self.block_init_cfg)) + self.in_channels[branch_index] = \ + num_channels[branch_index] * block.expansion + for i in range(1, num_blocks[branch_index]): + layers.append( + block( + self.in_channels[branch_index], + num_channels[branch_index], + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + init_cfg=self.block_init_cfg)) + + return Sequential(*layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + """Build multiple branch.""" + branches = [] + + for i in range(num_branches): + branches.append( + self._make_one_branch(i, block, num_blocks, num_channels)) + + return ModuleList(branches) + + def _make_fuse_layers(self): + """Build fuse layer.""" + if self.num_branches == 1: + return None + + num_branches = self.num_branches + in_channels = self.in_channels + fuse_layers = [] + num_out_branches = num_branches if self.multiscale_output else 1 + for i in range(num_out_branches): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=1, + stride=1, + padding=0, + bias=False), + build_norm_layer(self.norm_cfg, in_channels[i])[1], + # we set align_corners=False for HRNet + Upsample( + scale_factor=2**(j - i), + mode='bilinear', + align_corners=False))) + elif j == i: + fuse_layer.append(None) + else: + conv_downsamples = [] + for k in range(i - j): + if k == i - j - 1: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[i], + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + in_channels[i])[1])) + else: + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels[j], + in_channels[j], + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + in_channels[j])[1], + nn.ReLU(inplace=False))) + fuse_layer.append(nn.Sequential(*conv_downsamples)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + + return nn.ModuleList(fuse_layers) + + def forward(self, x): + """Forward function.""" + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + + x_fuse = [] + for i in range(len(self.fuse_layers)): + y = 0 + for j in range(self.num_branches): + if i == j: + y += x[j] + elif j > i: + y = y + resize( + self.fuse_layers[i][j](x[j]), + size=x[i].shape[2:], + mode='bilinear', + align_corners=False) + else: + y += self.fuse_layers[i][j](x[j]) + x_fuse.append(self.relu(y)) + return x_fuse + + +@BACKBONES.register_module() +class HRNet(BaseModule): + """HRNet backbone. + + This backbone is the implementation of `High-Resolution Representations + for Labeling Pixels and Regions `_. + + Args: + extra (dict): Detailed configuration for each stage of HRNet. + There must be 4 stages, the configuration for each stage must have + 5 keys: + + - num_modules (int): The number of HRModule in this stage. + - num_branches (int): The number of branches in the HRModule. + - block (str): The type of convolution block. + - num_blocks (tuple): The number of blocks in each branch. + The length must be equal to num_branches. + - num_channels (tuple): The number of channels in each branch. + The length must be equal to num_branches. + in_channels (int): Number of input image channels. Normally 3. + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Use `BN` by default. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. Default: -1. + zero_init_residual (bool): Whether to use zero init for last norm layer + in resblocks to let them behave as identity. Default: False. + multiscale_output (bool): Whether to output multi-level features + produced by multiple branches. If False, only the first level + feature will be output. Default: True. + pretrained (str, optional): Model pretrained path. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Example: + >>> from mmseg.models import HRNet + >>> import torch + >>> extra = dict( + >>> stage1=dict( + >>> num_modules=1, + >>> num_branches=1, + >>> block='BOTTLENECK', + >>> num_blocks=(4, ), + >>> num_channels=(64, )), + >>> stage2=dict( + >>> num_modules=1, + >>> num_branches=2, + >>> block='BASIC', + >>> num_blocks=(4, 4), + >>> num_channels=(32, 64)), + >>> stage3=dict( + >>> num_modules=4, + >>> num_branches=3, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4), + >>> num_channels=(32, 64, 128)), + >>> stage4=dict( + >>> num_modules=3, + >>> num_branches=4, + >>> block='BASIC', + >>> num_blocks=(4, 4, 4, 4), + >>> num_channels=(32, 64, 128, 256))) + >>> self = HRNet(extra, in_channels=1) + >>> self.eval() + >>> inputs = torch.rand(1, 1, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 32, 8, 8) + (1, 64, 4, 4) + (1, 128, 2, 2) + (1, 256, 1, 1) + """ + + blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck} + + def __init__(self, + extra, + in_channels=3, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + with_cp=False, + frozen_stages=-1, + zero_init_residual=False, + multiscale_output=True, + pretrained=None, + init_cfg=None): + super(HRNet, self).__init__(init_cfg) + + self.pretrained = pretrained + self.zero_init_residual = zero_init_residual + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + else: + raise TypeError('pretrained must be a str or None') + + # Assert configurations of 4 stages are in extra + assert 'stage1' in extra and 'stage2' in extra \ + and 'stage3' in extra and 'stage4' in extra + # Assert whether the length of `num_blocks` and `num_channels` are + # equal to `num_branches` + for i in range(4): + cfg = extra[f'stage{i + 1}'] + assert len(cfg['num_blocks']) == cfg['num_branches'] and \ + len(cfg['num_channels']) == cfg['num_branches'] + + self.extra = extra + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + self.frozen_stages = frozen_stages + + # stem net + self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1) + self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2) + + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + 64, + kernel_size=3, + stride=2, + padding=1, + bias=False) + + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + self.conv_cfg, + 64, + 64, + kernel_size=3, + stride=2, + padding=1, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.relu = nn.ReLU(inplace=True) + + # stage 1 + self.stage1_cfg = self.extra['stage1'] + num_channels = self.stage1_cfg['num_channels'][0] + block_type = self.stage1_cfg['block'] + num_blocks = self.stage1_cfg['num_blocks'][0] + + block = self.blocks_dict[block_type] + stage1_out_channels = num_channels * block.expansion + self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) + + # stage 2 + self.stage2_cfg = self.extra['stage2'] + num_channels = self.stage2_cfg['num_channels'] + block_type = self.stage2_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition1 = self._make_transition_layer([stage1_out_channels], + num_channels) + self.stage2, pre_stage_channels = self._make_stage( + self.stage2_cfg, num_channels) + + # stage 3 + self.stage3_cfg = self.extra['stage3'] + num_channels = self.stage3_cfg['num_channels'] + block_type = self.stage3_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition2 = self._make_transition_layer(pre_stage_channels, + num_channels) + self.stage3, pre_stage_channels = self._make_stage( + self.stage3_cfg, num_channels) + + # stage 4 + self.stage4_cfg = self.extra['stage4'] + num_channels = self.stage4_cfg['num_channels'] + block_type = self.stage4_cfg['block'] + + block = self.blocks_dict[block_type] + num_channels = [channel * block.expansion for channel in num_channels] + self.transition3 = self._make_transition_layer(pre_stage_channels, + num_channels) + self.stage4, pre_stage_channels = self._make_stage( + self.stage4_cfg, num_channels, multiscale_output=multiscale_output) + + self._freeze_stages() + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: the normalization layer named "norm2" """ + return getattr(self, self.norm2_name) + + def _make_transition_layer(self, num_channels_pre_layer, + num_channels_cur_layer): + """Make transition layer.""" + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + num_channels_pre_layer[i], + num_channels_cur_layer[i], + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, + num_channels_cur_layer[i])[1], + nn.ReLU(inplace=True))) + else: + transition_layers.append(None) + else: + conv_downsamples = [] + for j in range(i + 1 - num_branches_pre): + in_channels = num_channels_pre_layer[-1] + out_channels = num_channels_cur_layer[i] \ + if j == i - num_branches_pre else in_channels + conv_downsamples.append( + nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + out_channels, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, out_channels)[1], + nn.ReLU(inplace=True))) + transition_layers.append(nn.Sequential(*conv_downsamples)) + + return nn.ModuleList(transition_layers) + + def _make_layer(self, block, inplanes, planes, blocks, stride=1): + """Make each layer.""" + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + build_conv_layer( + self.conv_cfg, + inplanes, + planes * block.expansion, + kernel_size=1, + stride=stride, + bias=False), + build_norm_layer(self.norm_cfg, planes * block.expansion)[1]) + + layers = [] + block_init_cfg = None + if self.pretrained is None and not hasattr( + self, 'init_cfg') and self.zero_init_residual: + if block is BasicBlock: + block_init_cfg = dict( + type='Constant', val=0, override=dict(name='norm2')) + elif block is Bottleneck: + block_init_cfg = dict( + type='Constant', val=0, override=dict(name='norm3')) + + layers.append( + block( + inplanes, + planes, + stride, + downsample=downsample, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + init_cfg=block_init_cfg)) + inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block( + inplanes, + planes, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + init_cfg=block_init_cfg)) + + return Sequential(*layers) + + def _make_stage(self, layer_config, in_channels, multiscale_output=True): + """Make each stage.""" + num_modules = layer_config['num_modules'] + num_branches = layer_config['num_branches'] + num_blocks = layer_config['num_blocks'] + num_channels = layer_config['num_channels'] + block = self.blocks_dict[layer_config['block']] + + hr_modules = [] + block_init_cfg = None + if self.pretrained is None and not hasattr( + self, 'init_cfg') and self.zero_init_residual: + if block is BasicBlock: + block_init_cfg = dict( + type='Constant', val=0, override=dict(name='norm2')) + elif block is Bottleneck: + block_init_cfg = dict( + type='Constant', val=0, override=dict(name='norm3')) + + for i in range(num_modules): + # multi_scale_output is only used for the last module + if not multiscale_output and i == num_modules - 1: + reset_multiscale_output = False + else: + reset_multiscale_output = True + + hr_modules.append( + HRModule( + num_branches, + block, + num_blocks, + in_channels, + num_channels, + reset_multiscale_output, + with_cp=self.with_cp, + norm_cfg=self.norm_cfg, + conv_cfg=self.conv_cfg, + block_init_cfg=block_init_cfg)) + + return Sequential(*hr_modules), in_channels + + def _freeze_stages(self): + """Freeze stages param and norm stats.""" + if self.frozen_stages >= 0: + + self.norm1.eval() + self.norm2.eval() + for m in [self.conv1, self.norm1, self.conv2, self.norm2]: + for param in m.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + if i == 1: + m = getattr(self, f'layer{i}') + t = getattr(self, f'transition{i}') + elif i == 4: + m = getattr(self, f'stage{i}') + else: + m = getattr(self, f'stage{i}') + t = getattr(self, f'transition{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + t.eval() + for param in t.parameters(): + param.requires_grad = False + + def forward(self, x): + """Forward function.""" + + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.norm2(x) + x = self.relu(x) + x = self.layer1(x) + + x_list = [] + for i in range(self.stage2_cfg['num_branches']): + if self.transition1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list = self.stage2(x_list) + + x_list = [] + for i in range(self.stage3_cfg['num_branches']): + if self.transition2[i] is not None: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage3(x_list) + + x_list = [] + for i in range(self.stage4_cfg['num_branches']): + if self.transition3[i] is not None: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage4(x_list) + + return y_list + + def train(self, mode=True): + """Convert the model into training mode will keeping the normalization + layer freezed.""" + super(HRNet, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/icnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/icnet.py new file mode 100644 index 0000000..6faaeab --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/icnet.py @@ -0,0 +1,166 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmcv.runner import BaseModule + +from mmseg.ops import resize +from ..builder import BACKBONES, build_backbone +from ..decode_heads.psp_head import PPM + + +@BACKBONES.register_module() +class ICNet(BaseModule): + """ICNet for Real-Time Semantic Segmentation on High-Resolution Images. + + This backbone is the implementation of + `ICNet `_. + + Args: + backbone_cfg (dict): Config dict to build backbone. Usually it is + ResNet but it can also be other backbones. + in_channels (int): The number of input image channels. Default: 3. + layer_channels (Sequence[int]): The numbers of feature channels at + layer 2 and layer 4 in ResNet. It can also be other backbones. + Default: (512, 2048). + light_branch_middle_channels (int): The number of channels of the + middle layer in light branch. Default: 32. + psp_out_channels (int): The number of channels of the output of PSP + module. Default: 512. + out_channels (Sequence[int]): The numbers of output feature channels + at each branches. Default: (64, 256, 256). + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. Default: (1, 2, 3, 6). + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN'). + act_cfg (dict): Dictionary to construct and config act layer. + Default: dict(type='ReLU'). + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + backbone_cfg, + in_channels=3, + layer_channels=(512, 2048), + light_branch_middle_channels=32, + psp_out_channels=512, + out_channels=(64, 256, 256), + pool_scales=(1, 2, 3, 6), + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + align_corners=False, + init_cfg=None): + if backbone_cfg is None: + raise TypeError('backbone_cfg must be passed from config file!') + if init_cfg is None: + init_cfg = [ + dict(type='Kaiming', mode='fan_out', layer='Conv2d'), + dict(type='Constant', val=1, layer='_BatchNorm'), + dict(type='Normal', mean=0.01, layer='Linear') + ] + super(ICNet, self).__init__(init_cfg=init_cfg) + self.align_corners = align_corners + self.backbone = build_backbone(backbone_cfg) + + # Note: Default `ceil_mode` is false in nn.MaxPool2d, set + # `ceil_mode=True` to keep information in the corner of feature map. + self.backbone.maxpool = nn.MaxPool2d( + kernel_size=3, stride=2, padding=1, ceil_mode=True) + + self.psp_modules = PPM( + pool_scales=pool_scales, + in_channels=layer_channels[1], + channels=psp_out_channels, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + align_corners=align_corners) + + self.psp_bottleneck = ConvModule( + layer_channels[1] + len(pool_scales) * psp_out_channels, + psp_out_channels, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + self.conv_sub1 = nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=light_branch_middle_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg), + ConvModule( + in_channels=light_branch_middle_channels, + out_channels=light_branch_middle_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg), + ConvModule( + in_channels=light_branch_middle_channels, + out_channels=out_channels[0], + kernel_size=3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg)) + + self.conv_sub2 = ConvModule( + layer_channels[0], + out_channels[1], + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg) + + self.conv_sub4 = ConvModule( + psp_out_channels, + out_channels[2], + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg) + + def forward(self, x): + output = [] + + # sub 1 + output.append(self.conv_sub1(x)) + + # sub 2 + x = resize( + x, + scale_factor=0.5, + mode='bilinear', + align_corners=self.align_corners) + x = self.backbone.stem(x) + x = self.backbone.maxpool(x) + x = self.backbone.layer1(x) + x = self.backbone.layer2(x) + output.append(self.conv_sub2(x)) + + # sub 4 + x = resize( + x, + scale_factor=0.5, + mode='bilinear', + align_corners=self.align_corners) + x = self.backbone.layer3(x) + x = self.backbone.layer4(x) + psp_outs = self.psp_modules(x) + [x] + psp_outs = torch.cat(psp_outs, dim=1) + x = self.psp_bottleneck(psp_outs) + + output.append(self.conv_sub4(x)) + + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mae.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mae.py new file mode 100644 index 0000000..d3e8754 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mae.py @@ -0,0 +1,261 @@ +# Copyright (c) OpenMMLab. All rights reserved.import math +import math + +import torch +import torch.nn as nn +from mmcv.cnn.utils.weight_init import (constant_init, kaiming_init, + trunc_normal_) +from mmcv.runner import ModuleList, _load_checkpoint +from torch.nn.modules.batchnorm import _BatchNorm + +from mmseg.utils import get_root_logger +from ..builder import BACKBONES +from .beit import BEiT, BEiTAttention, BEiTTransformerEncoderLayer + + +class MAEAttention(BEiTAttention): + """Multi-head self-attention with relative position bias used in MAE. + + This module is different from ``BEiTAttention`` by initializing the + relative bias table with zeros. + """ + + def init_weights(self): + """Initialize relative position bias with zeros.""" + + # As MAE initializes relative position bias as zeros and this class + # inherited from BEiT which initializes relative position bias + # with `trunc_normal`, `init_weights` here does + # nothing and just passes directly + + pass + + +class MAETransformerEncoderLayer(BEiTTransformerEncoderLayer): + """Implements one encoder layer in Vision Transformer. + + This module is different from ``BEiTTransformerEncoderLayer`` by replacing + ``BEiTAttention`` with ``MAEAttention``. + """ + + def build_attn(self, attn_cfg): + self.attn = MAEAttention(**attn_cfg) + + +@BACKBONES.register_module() +class MAE(BEiT): + """VisionTransformer with support for patch. + + Args: + img_size (int | tuple): Input image size. Default: 224. + patch_size (int): The patch size. Default: 16. + in_channels (int): Number of input channels. Default: 3. + embed_dims (int): embedding dimension. Default: 768. + num_layers (int): depth of transformer. Default: 12. + num_heads (int): number of attention heads. Default: 12. + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + out_indices (list | tuple | int): Output from which stages. + Default: -1. + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + drop_path_rate (float): stochastic depth rate. Default 0.0. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + patch_norm (bool): Whether to add a norm in PatchEmbed Block. + Default: False. + final_norm (bool): Whether to add a additional layer to normalize + final feature map. Default: False. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + pretrained (str, optional): model pretrained path. Default: None. + init_values (float): Initialize the values of Attention and FFN + with learnable scaling. Defaults to 0.1. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=-1, + attn_drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN'), + act_cfg=dict(type='GELU'), + patch_norm=False, + final_norm=False, + num_fcs=2, + norm_eval=False, + pretrained=None, + init_values=0.1, + init_cfg=None): + super(MAE, self).__init__( + img_size=img_size, + patch_size=patch_size, + in_channels=in_channels, + embed_dims=embed_dims, + num_layers=num_layers, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + out_indices=out_indices, + qv_bias=False, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rate, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + patch_norm=patch_norm, + final_norm=final_norm, + num_fcs=num_fcs, + norm_eval=norm_eval, + pretrained=pretrained, + init_values=init_values, + init_cfg=init_cfg) + + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims)) + + self.num_patches = self.patch_shape[0] * self.patch_shape[1] + self.pos_embed = nn.Parameter( + torch.zeros(1, self.num_patches + 1, embed_dims)) + + def _build_layers(self): + dpr = [ + x.item() + for x in torch.linspace(0, self.drop_path_rate, self.num_layers) + ] + self.layers = ModuleList() + for i in range(self.num_layers): + self.layers.append( + MAETransformerEncoderLayer( + embed_dims=self.embed_dims, + num_heads=self.num_heads, + feedforward_channels=self.mlp_ratio * self.embed_dims, + attn_drop_rate=self.attn_drop_rate, + drop_path_rate=dpr[i], + num_fcs=self.num_fcs, + bias=True, + act_cfg=self.act_cfg, + norm_cfg=self.norm_cfg, + window_size=self.patch_shape, + init_values=self.init_values)) + + def fix_init_weight(self): + """Rescale the initialization according to layer id. + + This function is copied from https://github.com/microsoft/unilm/blob/master/beit/modeling_pretrain.py. # noqa: E501 + Copyright (c) Microsoft Corporation + Licensed under the MIT License + """ + + def rescale(param, layer_id): + param.div_(math.sqrt(2.0 * layer_id)) + + for layer_id, layer in enumerate(self.layers): + rescale(layer.attn.proj.weight.data, layer_id + 1) + rescale(layer.ffn.layers[1].weight.data, layer_id + 1) + + def init_weights(self): + + def _init_weights(m): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if isinstance(m, nn.Linear) and m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.LayerNorm): + nn.init.constant_(m.bias, 0) + nn.init.constant_(m.weight, 1.0) + + self.apply(_init_weights) + self.fix_init_weight() + + if (isinstance(self.init_cfg, dict) + and self.init_cfg.get('type') == 'Pretrained'): + logger = get_root_logger() + checkpoint = _load_checkpoint( + self.init_cfg['checkpoint'], logger=logger, map_location='cpu') + state_dict = self.resize_rel_pos_embed(checkpoint) + state_dict = self.resize_abs_pos_embed(state_dict) + self.load_state_dict(state_dict, False) + elif self.init_cfg is not None: + super(MAE, self).init_weights() + else: + # We only implement the 'jax_impl' initialization implemented at + # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 + # Copyright 2019 Ross Wightman + # Licensed under the Apache License, Version 2.0 (the "License") + trunc_normal_(self.cls_token, std=.02) + for n, m in self.named_modules(): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + if 'ffn' in n: + nn.init.normal_(m.bias, mean=0., std=1e-6) + else: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Conv2d): + kaiming_init(m, mode='fan_in', bias=0.) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m, val=1.0, bias=0.) + + def resize_abs_pos_embed(self, state_dict): + if 'pos_embed' in state_dict: + pos_embed_checkpoint = state_dict['pos_embed'] + embedding_size = pos_embed_checkpoint.shape[-1] + num_extra_tokens = self.pos_embed.shape[-2] - self.num_patches + # height (== width) for the checkpoint position embedding + orig_size = int( + (pos_embed_checkpoint.shape[-2] - num_extra_tokens)**0.5) + # height (== width) for the new position embedding + new_size = int(self.num_patches**0.5) + # class_token and dist_token are kept unchanged + if orig_size != new_size: + extra_tokens = pos_embed_checkpoint[:, :num_extra_tokens] + # only the position tokens are interpolated + pos_tokens = pos_embed_checkpoint[:, num_extra_tokens:] + pos_tokens = pos_tokens.reshape(-1, orig_size, orig_size, + embedding_size).permute( + 0, 3, 1, 2) + pos_tokens = torch.nn.functional.interpolate( + pos_tokens, + size=(new_size, new_size), + mode='bicubic', + align_corners=False) + pos_tokens = pos_tokens.permute(0, 2, 3, 1).flatten(1, 2) + new_pos_embed = torch.cat((extra_tokens, pos_tokens), dim=1) + state_dict['pos_embed'] = new_pos_embed + return state_dict + + def forward(self, inputs): + B = inputs.shape[0] + + x, hw_shape = self.patch_embed(inputs) + + # stole cls_tokens impl from Phil Wang, thanks + cls_tokens = self.cls_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + x = x + self.pos_embed + + outs = [] + for i, layer in enumerate(self.layers): + x = layer(x) + if i == len(self.layers) - 1: + if self.final_norm: + x = self.norm1(x) + if i in self.out_indices: + out = x[:, 1:] + B, _, C = out.shape + out = out.reshape(B, hw_shape[0], hw_shape[1], + C).permute(0, 3, 1, 2).contiguous() + outs.append(out) + + return tuple(outs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mit.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mit.py new file mode 100644 index 0000000..4417cf1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mit.py @@ -0,0 +1,450 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import warnings + +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import Conv2d, build_activation_layer, build_norm_layer +from mmcv.cnn.bricks.drop import build_dropout +from mmcv.cnn.bricks.transformer import MultiheadAttention +from mmcv.cnn.utils.weight_init import (constant_init, normal_init, + trunc_normal_init) +from mmcv.runner import BaseModule, ModuleList, Sequential + +from ..builder import BACKBONES +from ..utils import PatchEmbed, nchw_to_nlc, nlc_to_nchw + + +class MixFFN(BaseModule): + """An implementation of MixFFN of Segformer. + + The differences between MixFFN & FFN: + 1. Use 1X1 Conv to replace Linear layer. + 2. Introduce 3X3 Conv to encode positional information. + Args: + embed_dims (int): The feature dimension. Same as + `MultiheadAttention`. Defaults: 256. + feedforward_channels (int): The hidden dimension of FFNs. + Defaults: 1024. + act_cfg (dict, optional): The activation config for FFNs. + Default: dict(type='ReLU') + ffn_drop (float, optional): Probability of an element to be + zeroed in FFN. Default 0.0. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims, + feedforward_channels, + act_cfg=dict(type='GELU'), + ffn_drop=0., + dropout_layer=None, + init_cfg=None): + super(MixFFN, self).__init__(init_cfg) + + self.embed_dims = embed_dims + self.feedforward_channels = feedforward_channels + self.act_cfg = act_cfg + self.activate = build_activation_layer(act_cfg) + + in_channels = embed_dims + fc1 = Conv2d( + in_channels=in_channels, + out_channels=feedforward_channels, + kernel_size=1, + stride=1, + bias=True) + # 3x3 depth wise conv to provide positional encode information + pe_conv = Conv2d( + in_channels=feedforward_channels, + out_channels=feedforward_channels, + kernel_size=3, + stride=1, + padding=(3 - 1) // 2, + bias=True, + groups=feedforward_channels) + fc2 = Conv2d( + in_channels=feedforward_channels, + out_channels=in_channels, + kernel_size=1, + stride=1, + bias=True) + drop = nn.Dropout(ffn_drop) + layers = [fc1, pe_conv, self.activate, drop, fc2, drop] + self.layers = Sequential(*layers) + self.dropout_layer = build_dropout( + dropout_layer) if dropout_layer else torch.nn.Identity() + + def forward(self, x, hw_shape, identity=None): + out = nlc_to_nchw(x, hw_shape) + out = self.layers(out) + out = nchw_to_nlc(out) + if identity is None: + identity = x + return identity + self.dropout_layer(out) + + +class EfficientMultiheadAttention(MultiheadAttention): + """An implementation of Efficient Multi-head Attention of Segformer. + + This module is modified from MultiheadAttention which is a module from + mmcv.cnn.bricks.transformer. + Args: + embed_dims (int): The embedding dimension. + num_heads (int): Parallel attention heads. + attn_drop (float): A Dropout layer on attn_output_weights. + Default: 0.0. + proj_drop (float): A Dropout layer after `nn.MultiheadAttention`. + Default: 0.0. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. Default: None. + init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization. + Default: None. + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dim) + or (n, batch, embed_dim). Default: False. + qkv_bias (bool): enable bias for qkv if True. Default True. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + sr_ratio (int): The ratio of spatial reduction of Efficient Multi-head + Attention of Segformer. Default: 1. + """ + + def __init__(self, + embed_dims, + num_heads, + attn_drop=0., + proj_drop=0., + dropout_layer=None, + init_cfg=None, + batch_first=True, + qkv_bias=False, + norm_cfg=dict(type='LN'), + sr_ratio=1): + super().__init__( + embed_dims, + num_heads, + attn_drop, + proj_drop, + dropout_layer=dropout_layer, + init_cfg=init_cfg, + batch_first=batch_first, + bias=qkv_bias) + + self.sr_ratio = sr_ratio + if sr_ratio > 1: + self.sr = Conv2d( + in_channels=embed_dims, + out_channels=embed_dims, + kernel_size=sr_ratio, + stride=sr_ratio) + # The ret[0] of build_norm_layer is norm name. + self.norm = build_norm_layer(norm_cfg, embed_dims)[1] + + # handle the BC-breaking from https://github.com/open-mmlab/mmcv/pull/1418 # noqa + from mmseg import digit_version, mmcv_version + if mmcv_version < digit_version('1.3.17'): + warnings.warn('The legacy version of forward function in' + 'EfficientMultiheadAttention is deprecated in' + 'mmcv>=1.3.17 and will no longer support in the' + 'future. Please upgrade your mmcv.') + self.forward = self.legacy_forward + + def forward(self, x, hw_shape, identity=None): + + x_q = x + if self.sr_ratio > 1: + x_kv = nlc_to_nchw(x, hw_shape) + x_kv = self.sr(x_kv) + x_kv = nchw_to_nlc(x_kv) + x_kv = self.norm(x_kv) + else: + x_kv = x + + if identity is None: + identity = x_q + + # Because the dataflow('key', 'query', 'value') of + # ``torch.nn.MultiheadAttention`` is (num_query, batch, + # embed_dims), We should adjust the shape of dataflow from + # batch_first (batch, num_query, embed_dims) to num_query_first + # (num_query ,batch, embed_dims), and recover ``attn_output`` + # from num_query_first to batch_first. + if self.batch_first: + x_q = x_q.transpose(0, 1) + x_kv = x_kv.transpose(0, 1) + + out = self.attn(query=x_q, key=x_kv, value=x_kv)[0] + + if self.batch_first: + out = out.transpose(0, 1) + + return identity + self.dropout_layer(self.proj_drop(out)) + + def legacy_forward(self, x, hw_shape, identity=None): + """multi head attention forward in mmcv version < 1.3.17.""" + + x_q = x + if self.sr_ratio > 1: + x_kv = nlc_to_nchw(x, hw_shape) + x_kv = self.sr(x_kv) + x_kv = nchw_to_nlc(x_kv) + x_kv = self.norm(x_kv) + else: + x_kv = x + + if identity is None: + identity = x_q + + # `need_weights=True` will let nn.MultiHeadAttention + # `return attn_output, attn_output_weights.sum(dim=1) / num_heads` + # The `attn_output_weights.sum(dim=1)` may cause cuda error. So, we set + # `need_weights=False` to ignore `attn_output_weights.sum(dim=1)`. + # This issue - `https://github.com/pytorch/pytorch/issues/37583` report + # the error that large scale tensor sum operation may cause cuda error. + out = self.attn(query=x_q, key=x_kv, value=x_kv, need_weights=False)[0] + + return identity + self.dropout_layer(self.proj_drop(out)) + + +class TransformerEncoderLayer(BaseModule): + """Implements one encoder layer in Segformer. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + drop_rate (float): Probability of an element to be zeroed. + after the feed forward layer. Default 0.0. + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0. + drop_path_rate (float): stochastic depth rate. Default 0.0. + qkv_bias (bool): enable bias for qkv if True. + Default: True. + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dim) + or (n, batch, embed_dim). Default: False. + init_cfg (dict, optional): Initialization config dict. + Default:None. + sr_ratio (int): The ratio of spatial reduction of Efficient Multi-head + Attention of Segformer. Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + qkv_bias=True, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + batch_first=True, + sr_ratio=1, + with_cp=False): + super(TransformerEncoderLayer, self).__init__() + + # The ret[0] of build_norm_layer is norm name. + self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1] + + self.attn = EfficientMultiheadAttention( + embed_dims=embed_dims, + num_heads=num_heads, + attn_drop=attn_drop_rate, + proj_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + batch_first=batch_first, + qkv_bias=qkv_bias, + norm_cfg=norm_cfg, + sr_ratio=sr_ratio) + + # The ret[0] of build_norm_layer is norm name. + self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1] + + self.ffn = MixFFN( + embed_dims=embed_dims, + feedforward_channels=feedforward_channels, + ffn_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + act_cfg=act_cfg) + + self.with_cp = with_cp + + def forward(self, x, hw_shape): + + def _inner_forward(x): + x = self.attn(self.norm1(x), hw_shape, identity=x) + x = self.ffn(self.norm2(x), hw_shape, identity=x) + return x + + if self.with_cp and x.requires_grad: + x = cp.checkpoint(_inner_forward, x) + else: + x = _inner_forward(x) + return x + + +@BACKBONES.register_module() +class MixVisionTransformer(BaseModule): + """The backbone of Segformer. + + This backbone is the implementation of `SegFormer: Simple and + Efficient Design for Semantic Segmentation with + Transformers `_. + Args: + in_channels (int): Number of input channels. Default: 3. + embed_dims (int): Embedding dimension. Default: 768. + num_stags (int): The num of stages. Default: 4. + num_layers (Sequence[int]): The layer number of each transformer encode + layer. Default: [3, 4, 6, 3]. + num_heads (Sequence[int]): The attention heads of each transformer + encode layer. Default: [1, 2, 4, 8]. + patch_sizes (Sequence[int]): The patch_size of each overlapped patch + embedding. Default: [7, 3, 3, 3]. + strides (Sequence[int]): The stride of each overlapped patch embedding. + Default: [4, 2, 2, 2]. + sr_ratios (Sequence[int]): The spatial reduction rate of each + transformer encode layer. Default: [8, 4, 2, 1]. + out_indices (Sequence[int] | int): Output from which stages. + Default: (0, 1, 2, 3). + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + qkv_bias (bool): Enable bias for qkv if True. Default: True. + drop_rate (float): Probability of an element to be zeroed. + Default 0.0 + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + drop_path_rate (float): stochastic depth rate. Default 0.0 + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + pretrained (str, optional): model pretrained path. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + in_channels=3, + embed_dims=64, + num_stages=4, + num_layers=[3, 4, 6, 3], + num_heads=[1, 2, 4, 8], + patch_sizes=[7, 3, 3, 3], + strides=[4, 2, 2, 2], + sr_ratios=[8, 4, 2, 1], + out_indices=(0, 1, 2, 3), + mlp_ratio=4, + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN', eps=1e-6), + pretrained=None, + init_cfg=None, + with_cp=False): + super(MixVisionTransformer, self).__init__(init_cfg=init_cfg) + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be set at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + + self.embed_dims = embed_dims + self.num_stages = num_stages + self.num_layers = num_layers + self.num_heads = num_heads + self.patch_sizes = patch_sizes + self.strides = strides + self.sr_ratios = sr_ratios + self.with_cp = with_cp + assert num_stages == len(num_layers) == len(num_heads) \ + == len(patch_sizes) == len(strides) == len(sr_ratios) + + self.out_indices = out_indices + assert max(out_indices) < self.num_stages + + # transformer encoder + dpr = [ + x.item() + for x in torch.linspace(0, drop_path_rate, sum(num_layers)) + ] # stochastic num_layer decay rule + + cur = 0 + self.layers = ModuleList() + for i, num_layer in enumerate(num_layers): + embed_dims_i = embed_dims * num_heads[i] + patch_embed = PatchEmbed( + in_channels=in_channels, + embed_dims=embed_dims_i, + kernel_size=patch_sizes[i], + stride=strides[i], + padding=patch_sizes[i] // 2, + norm_cfg=norm_cfg) + layer = ModuleList([ + TransformerEncoderLayer( + embed_dims=embed_dims_i, + num_heads=num_heads[i], + feedforward_channels=mlp_ratio * embed_dims_i, + drop_rate=drop_rate, + attn_drop_rate=attn_drop_rate, + drop_path_rate=dpr[cur + idx], + qkv_bias=qkv_bias, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp, + sr_ratio=sr_ratios[i]) for idx in range(num_layer) + ]) + in_channels = embed_dims_i + # The ret[0] of build_norm_layer is norm name. + norm = build_norm_layer(norm_cfg, embed_dims_i)[1] + self.layers.append(ModuleList([patch_embed, layer, norm])) + cur += num_layer + + def init_weights(self): + if self.init_cfg is None: + for m in self.modules(): + if isinstance(m, nn.Linear): + trunc_normal_init(m, std=.02, bias=0.) + elif isinstance(m, nn.LayerNorm): + constant_init(m, val=1.0, bias=0.) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[ + 1] * m.out_channels + fan_out //= m.groups + normal_init( + m, mean=0, std=math.sqrt(2.0 / fan_out), bias=0) + else: + super(MixVisionTransformer, self).init_weights() + + def forward(self, x): + outs = [] + + for i, layer in enumerate(self.layers): + x, hw_shape = layer[0](x) + for block in layer[1]: + x = block(x, hw_shape) + x = layer[2](x) + x = nlc_to_nchw(x, hw_shape) + if i in self.out_indices: + outs.append(x) + + return outs diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mobilenet_v2.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mobilenet_v2.py new file mode 100644 index 0000000..cbb9c6c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mobilenet_v2.py @@ -0,0 +1,197 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +from mmcv.cnn import ConvModule +from mmcv.runner import BaseModule +from torch.nn.modules.batchnorm import _BatchNorm + +from ..builder import BACKBONES +from ..utils import InvertedResidual, make_divisible + + +@BACKBONES.register_module() +class MobileNetV2(BaseModule): + """MobileNetV2 backbone. + + This backbone is the implementation of + `MobileNetV2: Inverted Residuals and Linear Bottlenecks + `_. + + Args: + widen_factor (float): Width multiplier, multiply number of + channels in each layer by this amount. Default: 1.0. + strides (Sequence[int], optional): Strides of the first block of each + layer. If not specified, default config in ``arch_setting`` will + be used. + dilations (Sequence[int]): Dilation of each layer. + out_indices (None or Sequence[int]): Output from which stages. + Default: (7, ). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + pretrained (str, optional): model pretrained path. Default: None + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + """ + + # Parameters to build layers. 3 parameters are needed to construct a + # layer, from left to right: expand_ratio, channel, num_blocks. + arch_settings = [[1, 16, 1], [6, 24, 2], [6, 32, 3], [6, 64, 4], + [6, 96, 3], [6, 160, 3], [6, 320, 1]] + + def __init__(self, + widen_factor=1., + strides=(1, 2, 2, 2, 1, 2, 1), + dilations=(1, 1, 1, 1, 1, 1, 1), + out_indices=(1, 2, 4, 6), + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + norm_eval=False, + with_cp=False, + pretrained=None, + init_cfg=None): + super(MobileNetV2, self).__init__(init_cfg) + + self.pretrained = pretrained + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is a deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + else: + raise TypeError('pretrained must be a str or None') + + self.widen_factor = widen_factor + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == len(self.arch_settings) + self.out_indices = out_indices + for index in out_indices: + if index not in range(0, 7): + raise ValueError('the item in out_indices must in ' + f'range(0, 7). But received {index}') + + if frozen_stages not in range(-1, 7): + raise ValueError('frozen_stages must be in range(-1, 7). ' + f'But received {frozen_stages}') + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.norm_eval = norm_eval + self.with_cp = with_cp + + self.in_channels = make_divisible(32 * widen_factor, 8) + + self.conv1 = ConvModule( + in_channels=3, + out_channels=self.in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.layers = [] + + for i, layer_cfg in enumerate(self.arch_settings): + expand_ratio, channel, num_blocks = layer_cfg + stride = self.strides[i] + dilation = self.dilations[i] + out_channels = make_divisible(channel * widen_factor, 8) + inverted_res_layer = self.make_layer( + out_channels=out_channels, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + expand_ratio=expand_ratio) + layer_name = f'layer{i + 1}' + self.add_module(layer_name, inverted_res_layer) + self.layers.append(layer_name) + + def make_layer(self, out_channels, num_blocks, stride, dilation, + expand_ratio): + """Stack InvertedResidual blocks to build a layer for MobileNetV2. + + Args: + out_channels (int): out_channels of block. + num_blocks (int): Number of blocks. + stride (int): Stride of the first block. + dilation (int): Dilation of the first block. + expand_ratio (int): Expand the number of channels of the + hidden layer in InvertedResidual by this ratio. + """ + layers = [] + for i in range(num_blocks): + layers.append( + InvertedResidual( + self.in_channels, + out_channels, + stride if i == 0 else 1, + expand_ratio=expand_ratio, + dilation=dilation if i == 0 else 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + with_cp=self.with_cp)) + self.in_channels = out_channels + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + + if len(outs) == 1: + return outs[0] + else: + return tuple(outs) + + def _freeze_stages(self): + if self.frozen_stages >= 0: + for param in self.conv1.parameters(): + param.requires_grad = False + for i in range(1, self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(MobileNetV2, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mobilenet_v3.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mobilenet_v3.py new file mode 100644 index 0000000..dd3d6eb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/mobilenet_v3.py @@ -0,0 +1,267 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import mmcv +from mmcv.cnn import ConvModule +from mmcv.cnn.bricks import Conv2dAdaptivePadding +from mmcv.runner import BaseModule +from torch.nn.modules.batchnorm import _BatchNorm + +from ..builder import BACKBONES +from ..utils import InvertedResidualV3 as InvertedResidual + + +@BACKBONES.register_module() +class MobileNetV3(BaseModule): + """MobileNetV3 backbone. + + This backbone is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + arch (str): Architecture of mobilnetv3, from {'small', 'large'}. + Default: 'small'. + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + out_indices (tuple[int]): Output from which layer. + Default: (0, 1, 12). + frozen_stages (int): Stages to be frozen (all param fixed). + Default: -1, which means not freezing any parameters. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. + Default: False. + pretrained (str, optional): model pretrained path. Default: None + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + """ + # Parameters to build each block: + # [kernel size, mid channels, out channels, with_se, act type, stride] + arch_settings = { + 'small': [[3, 16, 16, True, 'ReLU', 2], # block0 layer1 os=4 + [3, 72, 24, False, 'ReLU', 2], # block1 layer2 os=8 + [3, 88, 24, False, 'ReLU', 1], + [5, 96, 40, True, 'HSwish', 2], # block2 layer4 os=16 + [5, 240, 40, True, 'HSwish', 1], + [5, 240, 40, True, 'HSwish', 1], + [5, 120, 48, True, 'HSwish', 1], # block3 layer7 os=16 + [5, 144, 48, True, 'HSwish', 1], + [5, 288, 96, True, 'HSwish', 2], # block4 layer9 os=32 + [5, 576, 96, True, 'HSwish', 1], + [5, 576, 96, True, 'HSwish', 1]], + 'large': [[3, 16, 16, False, 'ReLU', 1], # block0 layer1 os=2 + [3, 64, 24, False, 'ReLU', 2], # block1 layer2 os=4 + [3, 72, 24, False, 'ReLU', 1], + [5, 72, 40, True, 'ReLU', 2], # block2 layer4 os=8 + [5, 120, 40, True, 'ReLU', 1], + [5, 120, 40, True, 'ReLU', 1], + [3, 240, 80, False, 'HSwish', 2], # block3 layer7 os=16 + [3, 200, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 184, 80, False, 'HSwish', 1], + [3, 480, 112, True, 'HSwish', 1], # block4 layer11 os=16 + [3, 672, 112, True, 'HSwish', 1], + [5, 672, 160, True, 'HSwish', 2], # block5 layer13 os=32 + [5, 960, 160, True, 'HSwish', 1], + [5, 960, 160, True, 'HSwish', 1]] + } # yapf: disable + + def __init__(self, + arch='small', + conv_cfg=None, + norm_cfg=dict(type='BN'), + out_indices=(0, 1, 12), + frozen_stages=-1, + reduction_factor=1, + norm_eval=False, + with_cp=False, + pretrained=None, + init_cfg=None): + super(MobileNetV3, self).__init__(init_cfg) + + self.pretrained = pretrained + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is a deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + else: + raise TypeError('pretrained must be a str or None') + + assert arch in self.arch_settings + assert isinstance(reduction_factor, int) and reduction_factor > 0 + assert mmcv.is_tuple_of(out_indices, int) + for index in out_indices: + if index not in range(0, len(self.arch_settings[arch]) + 2): + raise ValueError( + 'the item in out_indices must in ' + f'range(0, {len(self.arch_settings[arch])+2}). ' + f'But received {index}') + + if frozen_stages not in range(-1, len(self.arch_settings[arch]) + 2): + raise ValueError('frozen_stages must be in range(-1, ' + f'{len(self.arch_settings[arch])+2}). ' + f'But received {frozen_stages}') + self.arch = arch + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.out_indices = out_indices + self.frozen_stages = frozen_stages + self.reduction_factor = reduction_factor + self.norm_eval = norm_eval + self.with_cp = with_cp + self.layers = self._make_layer() + + def _make_layer(self): + layers = [] + + # build the first layer (layer0) + in_channels = 16 + layer = ConvModule( + in_channels=3, + out_channels=in_channels, + kernel_size=3, + stride=2, + padding=1, + conv_cfg=dict(type='Conv2dAdaptivePadding'), + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish')) + self.add_module('layer0', layer) + layers.append('layer0') + + layer_setting = self.arch_settings[self.arch] + for i, params in enumerate(layer_setting): + (kernel_size, mid_channels, out_channels, with_se, act, + stride) = params + + if self.arch == 'large' and i >= 12 or self.arch == 'small' and \ + i >= 8: + mid_channels = mid_channels // self.reduction_factor + out_channels = out_channels // self.reduction_factor + + if with_se: + se_cfg = dict( + channels=mid_channels, + ratio=4, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))) + else: + se_cfg = None + + layer = InvertedResidual( + in_channels=in_channels, + out_channels=out_channels, + mid_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + se_cfg=se_cfg, + with_expand_conv=(in_channels != mid_channels), + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type=act), + with_cp=self.with_cp) + in_channels = out_channels + layer_name = 'layer{}'.format(i + 1) + self.add_module(layer_name, layer) + layers.append(layer_name) + + # build the last layer + # block5 layer12 os=32 for small model + # block6 layer16 os=32 for large model + layer = ConvModule( + in_channels=in_channels, + out_channels=576 if self.arch == 'small' else 960, + kernel_size=1, + stride=1, + dilation=4, + padding=0, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=dict(type='HSwish')) + layer_name = 'layer{}'.format(len(layer_setting) + 1) + self.add_module(layer_name, layer) + layers.append(layer_name) + + # next, convert backbone MobileNetV3 to a semantic segmentation version + if self.arch == 'small': + self.layer4.depthwise_conv.conv.stride = (1, 1) + self.layer9.depthwise_conv.conv.stride = (1, 1) + for i in range(4, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 9: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + else: + self.layer7.depthwise_conv.conv.stride = (1, 1) + self.layer13.depthwise_conv.conv.stride = (1, 1) + for i in range(7, len(layers)): + layer = getattr(self, layers[i]) + if isinstance(layer, InvertedResidual): + modified_module = layer.depthwise_conv.conv + else: + modified_module = layer.conv + + if i < 13: + modified_module.dilation = (2, 2) + pad = 2 + else: + modified_module.dilation = (4, 4) + pad = 4 + + if not isinstance(modified_module, Conv2dAdaptivePadding): + # Adjust padding + pad *= (modified_module.kernel_size[0] - 1) // 2 + modified_module.padding = (pad, pad) + + return layers + + def forward(self, x): + outs = [] + for i, layer_name in enumerate(self.layers): + layer = getattr(self, layer_name) + x = layer(x) + if i in self.out_indices: + outs.append(x) + return outs + + def _freeze_stages(self): + for i in range(self.frozen_stages + 1): + layer = getattr(self, f'layer{i}') + layer.eval() + for param in layer.parameters(): + param.requires_grad = False + + def train(self, mode=True): + super(MobileNetV3, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, _BatchNorm): + m.eval() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/resnest.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/resnest.py new file mode 100644 index 0000000..7e54e78 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/resnest.py @@ -0,0 +1,322 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNetV1d + + +class RSoftmax(nn.Module): + """Radix Softmax module in ``SplitAttentionConv2d``. + + Args: + radix (int): Radix of input. + groups (int): Groups of input. + """ + + def __init__(self, radix, groups): + super().__init__() + self.radix = radix + self.groups = groups + + def forward(self, x): + batch = x.size(0) + if self.radix > 1: + x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2) + x = F.softmax(x, dim=1) + x = x.reshape(batch, -1) + else: + x = torch.sigmoid(x) + return x + + +class SplitAttentionConv2d(nn.Module): + """Split-Attention Conv2d in ResNeSt. + + Args: + in_channels (int): Same as nn.Conv2d. + out_channels (int): Same as nn.Conv2d. + kernel_size (int | tuple[int]): Same as nn.Conv2d. + stride (int | tuple[int]): Same as nn.Conv2d. + padding (int | tuple[int]): Same as nn.Conv2d. + dilation (int | tuple[int]): Same as nn.Conv2d. + groups (int): Same as nn.Conv2d. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels. Default: 4. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. Default: None. + dcn (dict): Config dict for DCN. Default: None. + """ + + def __init__(self, + in_channels, + channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + radix=2, + reduction_factor=4, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None): + super(SplitAttentionConv2d, self).__init__() + inter_channels = max(in_channels * radix // reduction_factor, 32) + self.radix = radix + self.groups = groups + self.channels = channels + self.with_dcn = dcn is not None + self.dcn = dcn + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if self.with_dcn and not fallback_on_stride: + assert conv_cfg is None, 'conv_cfg must be None for DCN' + conv_cfg = dcn + self.conv = build_conv_layer( + conv_cfg, + in_channels, + channels * radix, + kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups * radix, + bias=False) + self.norm0_name, norm0 = build_norm_layer( + norm_cfg, channels * radix, postfix=0) + self.add_module(self.norm0_name, norm0) + self.relu = nn.ReLU(inplace=True) + self.fc1 = build_conv_layer( + None, channels, inter_channels, 1, groups=self.groups) + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, inter_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.fc2 = build_conv_layer( + None, inter_channels, channels * radix, 1, groups=self.groups) + self.rsoftmax = RSoftmax(radix, groups) + + @property + def norm0(self): + """nn.Module: the normalization layer named "norm0" """ + return getattr(self, self.norm0_name) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def forward(self, x): + x = self.conv(x) + x = self.norm0(x) + x = self.relu(x) + + batch, rchannel = x.shape[:2] + batch = x.size(0) + if self.radix > 1: + splits = x.view(batch, self.radix, -1, *x.shape[2:]) + gap = splits.sum(dim=1) + else: + gap = x + gap = F.adaptive_avg_pool2d(gap, 1) + gap = self.fc1(gap) + + gap = self.norm1(gap) + gap = self.relu(gap) + + atten = self.fc2(gap) + atten = self.rsoftmax(atten).view(batch, -1, 1, 1) + + if self.radix > 1: + attens = atten.view(batch, self.radix, -1, *atten.shape[2:]) + out = torch.sum(attens * splits, dim=1) + else: + out = atten * x + return out.contiguous() + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeSt. + + Args: + inplane (int): Input planes of this block. + planes (int): Middle planes of this block. + groups (int): Groups of conv2. + width_per_group (int): Width per group of conv2. 64x4d indicates + ``groups=64, width_per_group=4`` and 32x8d indicates + ``groups=32, width_per_group=8``. + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Key word arguments for base class. + """ + expansion = 4 + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs): + """Bottleneck block for ResNeSt.""" + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.avg_down_stride = avg_down_stride and self.conv2_stride > 1 + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + self.with_modulated_dcn = False + self.conv2 = SplitAttentionConv2d( + width, + width, + kernel_size=3, + stride=1 if self.avg_down_stride else self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + radix=radix, + reduction_factor=reduction_factor, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + dcn=self.dcn) + delattr(self, self.norm2_name) + + if self.avg_down_stride: + self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1) + + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + def forward(self, x): + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + + if self.avg_down_stride: + out = self.avd_layer(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@BACKBONES.register_module() +class ResNeSt(ResNetV1d): + """ResNeSt backbone. + + This backbone is the implementation of `ResNeSt: + Split-Attention Networks `_. + + Args: + groups (int): Number of groups of Bottleneck. Default: 1 + base_width (int): Base width of Bottleneck. Default: 4 + radix (int): Radix of SpltAtConv2d. Default: 2 + reduction_factor (int): Reduction factor of inter_channels in + SplitAttentionConv2d. Default: 4. + avg_down_stride (bool): Whether to use average pool for stride in + Bottleneck. Default: True. + kwargs (dict): Keyword arguments for ResNet. + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)), + 200: (Bottleneck, (3, 24, 36, 3)) + } + + def __init__(self, + groups=1, + base_width=4, + radix=2, + reduction_factor=4, + avg_down_stride=True, + **kwargs): + self.groups = groups + self.base_width = base_width + self.radix = radix + self.reduction_factor = reduction_factor + self.avg_down_stride = avg_down_stride + super(ResNeSt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + radix=self.radix, + reduction_factor=self.reduction_factor, + avg_down_stride=self.avg_down_stride, + **kwargs) + + + + diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/resnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/resnet.py new file mode 100644 index 0000000..e8b961d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/resnet.py @@ -0,0 +1,714 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import build_conv_layer, build_norm_layer, build_plugin_layer +from mmcv.runner import BaseModule +from mmcv.utils.parrots_wrapper import _BatchNorm + +from ..builder import BACKBONES +from ..utils import ResLayer + + +class BasicBlock(BaseModule): + """Basic block for ResNet.""" + + expansion = 1 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None, + init_cfg=None): + super(BasicBlock, self).__init__(init_cfg) + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + 3, + stride=stride, + padding=dilation, + dilation=dilation, + bias=False) + self.add_module(self.norm1_name, norm1) + self.conv2 = build_conv_layer( + conv_cfg, planes, planes, 3, padding=1, bias=False) + self.add_module(self.norm2_name, norm2) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + self.dilation = dilation + self.with_cp = with_cp + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.norm2(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +class Bottleneck(BaseModule): + """Bottleneck block for ResNet. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + expansion = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + dilation=1, + downsample=None, + style='pytorch', + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + dcn=None, + plugins=None, + init_cfg=None): + super(Bottleneck, self).__init__(init_cfg) + assert style in ['pytorch', 'caffe'] + assert dcn is None or isinstance(dcn, dict) + assert plugins is None or isinstance(plugins, list) + if plugins is not None: + allowed_position = ['after_conv1', 'after_conv2', 'after_conv3'] + assert all(p['position'] in allowed_position for p in plugins) + + self.inplanes = inplanes + self.planes = planes + self.stride = stride + self.dilation = dilation + self.style = style + self.with_cp = with_cp + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.dcn = dcn + self.with_dcn = dcn is not None + self.plugins = plugins + self.with_plugins = plugins is not None + + if self.with_plugins: + # collect plugins for conv1/conv2/conv3 + self.after_conv1_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv1' + ] + self.after_conv2_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv2' + ] + self.after_conv3_plugins = [ + plugin['cfg'] for plugin in plugins + if plugin['position'] == 'after_conv3' + ] + + if self.style == 'pytorch': + self.conv1_stride = 1 + self.conv2_stride = stride + else: + self.conv1_stride = stride + self.conv2_stride = 1 + + self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1) + self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + norm_cfg, planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + conv_cfg, + inplanes, + planes, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + if self.with_dcn: + fallback_on_stride = dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + conv_cfg, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + dcn, + planes, + planes, + kernel_size=3, + stride=self.conv2_stride, + padding=dilation, + dilation=dilation, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + conv_cfg, + planes, + planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + + if self.with_plugins: + self.after_conv1_plugin_names = self.make_block_plugins( + planes, self.after_conv1_plugins) + self.after_conv2_plugin_names = self.make_block_plugins( + planes, self.after_conv2_plugins) + self.after_conv3_plugin_names = self.make_block_plugins( + planes * self.expansion, self.after_conv3_plugins) + + def make_block_plugins(self, in_channels, plugins): + """make plugins for block. + + Args: + in_channels (int): Input channels of plugin. + plugins (list[dict]): List of plugins cfg to build. + + Returns: + list[str]: List of the names of plugin. + """ + assert isinstance(plugins, list) + plugin_names = [] + for plugin in plugins: + plugin = plugin.copy() + name, layer = build_plugin_layer( + plugin, + in_channels=in_channels, + postfix=plugin.pop('postfix', '')) + assert not hasattr(self, name), f'duplicate plugin {name}' + self.add_module(name, layer) + plugin_names.append(name) + return plugin_names + + def forward_plugin(self, x, plugin_names): + """Forward function for plugins.""" + out = x + for name in plugin_names: + out = getattr(self, name)(x) + return out + + @property + def norm1(self): + """nn.Module: normalization layer after the first convolution layer""" + return getattr(self, self.norm1_name) + + @property + def norm2(self): + """nn.Module: normalization layer after the second convolution layer""" + return getattr(self, self.norm2_name) + + @property + def norm3(self): + """nn.Module: normalization layer after the third convolution layer""" + return getattr(self, self.norm3_name) + + def forward(self, x): + """Forward function.""" + + def _inner_forward(x): + identity = x + + out = self.conv1(x) + out = self.norm1(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv1_plugin_names) + + out = self.conv2(out) + out = self.norm2(out) + out = self.relu(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv2_plugin_names) + + out = self.conv3(out) + out = self.norm3(out) + + if self.with_plugins: + out = self.forward_plugin(out, self.after_conv3_plugin_names) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + out = self.relu(out) + + return out + + +@BACKBONES.register_module() +class ResNet(BaseModule): + """ResNet backbone. + + This backbone is the improved implementation of `Deep Residual Learning + for Image Recognition `_. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Default: 3. + stem_channels (int): Number of stem channels. Default: 64. + base_channels (int): Number of base channels of res layer. Default: 64. + num_stages (int): Resnet stages, normally 4. Default: 4. + strides (Sequence[int]): Strides of the first block of each stage. + Default: (1, 2, 2, 2). + dilations (Sequence[int]): Dilation of each stage. + Default: (1, 1, 1, 1). + out_indices (Sequence[int]): Output from which stages. + Default: (0, 1, 2, 3). + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. Default: 'pytorch'. + deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv. + Default: False. + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. Default: False. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. Default: -1. + conv_cfg (dict | None): Dictionary to construct and config conv layer. + When conv_cfg is None, cfg will be set to dict(type='Conv2d'). + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN', requires_grad=True). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + dcn (dict | None): Dictionary to construct and config DCN conv layer. + When dcn is not None, conv_cfg must be None. Default: None. + stage_with_dcn (Sequence[bool]): Whether to set DCN conv for each + stage. The length of stage_with_dcn is equal to num_stages. + Default: (False, False, False, False). + plugins (list[dict]): List of plugins for stages, each dict contains: + + - cfg (dict, required): Cfg dict to build plugin. + + - position (str, required): Position inside block to insert plugin, + options: 'after_conv1', 'after_conv2', 'after_conv3'. + + - stages (tuple[bool], optional): Stages to apply plugin, length + should be same as 'num_stages'. + Default: None. + multi_grid (Sequence[int]|None): Multi grid dilation rates of last + stage. Default: None. + contract_dilation (bool): Whether contract first dilation of each layer + Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + zero_init_residual (bool): Whether to use zero init for last norm layer + in resblocks to let them behave as identity. Default: True. + pretrained (str, optional): model pretrained path. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Example: + >>> from mmseg.models import ResNet + >>> import torch + >>> self = ResNet(depth=18) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 64, 8, 8) + (1, 128, 4, 4) + (1, 256, 2, 2) + (1, 512, 1, 1) + """ + + arch_settings = { + 18: (BasicBlock, (2, 2, 2, 2)), + 34: (BasicBlock, (3, 4, 6, 3)), + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, + depth, + in_channels=3, + stem_channels=64, + base_channels=64, + num_stages=4, + strides=(1, 2, 2, 2), + dilations=(1, 1, 1, 1), + out_indices=(0, 1, 2, 3), + style='pytorch', + deep_stem=False, + avg_down=False, + frozen_stages=-1, + conv_cfg=None, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + dcn=None, + stage_with_dcn=(False, False, False, False), + plugins=None, + multi_grid=None, + contract_dilation=False, + with_cp=False, + zero_init_residual=True, + pretrained=None, + init_cfg=None): + super(ResNet, self).__init__(init_cfg) + if depth not in self.arch_settings: + raise KeyError(f'invalid depth {depth} for resnet') + + self.pretrained = pretrained + self.zero_init_residual = zero_init_residual + block_init_cfg = None + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is a deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + block = self.arch_settings[depth][0] + if self.zero_init_residual: + if block is BasicBlock: + block_init_cfg = dict( + type='Constant', + val=0, + override=dict(name='norm2')) + elif block is Bottleneck: + block_init_cfg = dict( + type='Constant', + val=0, + override=dict(name='norm3')) + else: + raise TypeError('pretrained must be a str or None') + + self.depth = depth + self.stem_channels = stem_channels + self.base_channels = base_channels + self.num_stages = num_stages + assert num_stages >= 1 and num_stages <= 4 + self.strides = strides + self.dilations = dilations + assert len(strides) == len(dilations) == num_stages + self.out_indices = out_indices + assert max(out_indices) < num_stages + self.style = style + self.deep_stem = deep_stem + self.avg_down = avg_down + self.frozen_stages = frozen_stages + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.with_cp = with_cp + self.norm_eval = norm_eval + self.dcn = dcn + self.stage_with_dcn = stage_with_dcn + if dcn is not None: + assert len(stage_with_dcn) == num_stages + self.plugins = plugins + self.multi_grid = multi_grid + self.contract_dilation = contract_dilation + self.block, stage_blocks = self.arch_settings[depth] + self.stage_blocks = stage_blocks[:num_stages] + self.inplanes = stem_channels + + self._make_stem_layer(in_channels, stem_channels) + + self.res_layers = [] + for i, num_blocks in enumerate(self.stage_blocks): + stride = strides[i] + dilation = dilations[i] + dcn = self.dcn if self.stage_with_dcn[i] else None + if plugins is not None: + stage_plugins = self.make_stage_plugins(plugins, i) + else: + stage_plugins = None + # multi grid is applied to last layer only + stage_multi_grid = multi_grid if i == len( + self.stage_blocks) - 1 else None + planes = base_channels * 2**i + res_layer = self.make_res_layer( + block=self.block, + inplanes=self.inplanes, + planes=planes, + num_blocks=num_blocks, + stride=stride, + dilation=dilation, + style=self.style, + avg_down=self.avg_down, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + dcn=dcn, + plugins=stage_plugins, + multi_grid=stage_multi_grid, + contract_dilation=contract_dilation, + init_cfg=block_init_cfg) + self.inplanes = planes * self.block.expansion + layer_name = f'layer{i+1}' + self.add_module(layer_name, res_layer) + self.res_layers.append(layer_name) + + self._freeze_stages() + + self.feat_dim = self.block.expansion * base_channels * 2**( + len(self.stage_blocks) - 1) + + def make_stage_plugins(self, plugins, stage_idx): + """make plugins for ResNet 'stage_idx'th stage . + + Currently we support to insert 'context_block', + 'empirical_attention_block', 'nonlocal_block' into the backbone like + ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of + Bottleneck. + + An example of plugins format could be : + >>> plugins=[ + ... dict(cfg=dict(type='xxx', arg1='xxx'), + ... stages=(False, True, True, True), + ... position='after_conv2'), + ... dict(cfg=dict(type='yyy'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='1'), + ... stages=(True, True, True, True), + ... position='after_conv3'), + ... dict(cfg=dict(type='zzz', postfix='2'), + ... stages=(True, True, True, True), + ... position='after_conv3') + ... ] + >>> self = ResNet(depth=18) + >>> stage_plugins = self.make_stage_plugins(plugins, 0) + >>> assert len(stage_plugins) == 3 + + Suppose 'stage_idx=0', the structure of blocks in the stage would be: + conv1-> conv2->conv3->yyy->zzz1->zzz2 + Suppose 'stage_idx=1', the structure of blocks in the stage would be: + conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2 + + If stages is missing, the plugin would be applied to all stages. + + Args: + plugins (list[dict]): List of plugins cfg to build. The postfix is + required if multiple same type plugins are inserted. + stage_idx (int): Index of stage to build + + Returns: + list[dict]: Plugins for current stage + """ + stage_plugins = [] + for plugin in plugins: + plugin = plugin.copy() + stages = plugin.pop('stages', None) + assert stages is None or len(stages) == self.num_stages + # whether to insert plugin into current stage + if stages is None or stages[stage_idx]: + stage_plugins.append(plugin) + + return stage_plugins + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``.""" + return ResLayer(**kwargs) + + @property + def norm1(self): + """nn.Module: the normalization layer named "norm1" """ + return getattr(self, self.norm1_name) + + def _make_stem_layer(self, in_channels, stem_channels): + """Make stem layer for ResNet.""" + if self.deep_stem: + self.stem = nn.Sequential( + build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels // 2, + kernel_size=3, + stride=2, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels // 2, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels // 2)[1], + nn.ReLU(inplace=True), + build_conv_layer( + self.conv_cfg, + stem_channels // 2, + stem_channels, + kernel_size=3, + stride=1, + padding=1, + bias=False), + build_norm_layer(self.norm_cfg, stem_channels)[1], + nn.ReLU(inplace=True)) + else: + self.conv1 = build_conv_layer( + self.conv_cfg, + in_channels, + stem_channels, + kernel_size=7, + stride=2, + padding=3, + bias=False) + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, stem_channels, postfix=1) + self.add_module(self.norm1_name, norm1) + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + + def _freeze_stages(self): + """Freeze stages param and norm stats.""" + if self.frozen_stages >= 0: + if self.deep_stem: + self.stem.eval() + for param in self.stem.parameters(): + param.requires_grad = False + else: + self.norm1.eval() + for m in [self.conv1, self.norm1]: + for param in m.parameters(): + param.requires_grad = False + + for i in range(1, self.frozen_stages + 1): + m = getattr(self, f'layer{i}') + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def forward(self, x): + """Forward function.""" + if self.deep_stem: + x = self.stem(x) + else: + x = self.conv1(x) + x = self.norm1(x) + x = self.relu(x) + x = self.maxpool(x) + outs = [] + for i, layer_name in enumerate(self.res_layers): + res_layer = getattr(self, layer_name) + x = res_layer(x) + if i in self.out_indices: + outs.append(x) + return tuple(outs) + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(ResNet, self).train(mode) + self._freeze_stages() + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + +@BACKBONES.register_module() +class ResNetV1c(ResNet): + """ResNetV1c variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1c replaces the 7x7 conv in + the input stem with three 3x3 convs. For more details please refer to `Bag + of Tricks for Image Classification with Convolutional Neural Networks + `_. + """ + + def __init__(self, **kwargs): + super(ResNetV1c, self).__init__( + deep_stem=True, avg_down=False, **kwargs) + + +@BACKBONES.register_module() +class ResNetV1d(ResNet): + """ResNetV1d variant described in [1]_. + + Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in + the input stem with three 3x3 convs. And in the downsampling block, a 2x2 + avg_pool with stride 2 is added before conv, whose stride is changed to 1. + """ + + def __init__(self, **kwargs): + super(ResNetV1d, self).__init__( + deep_stem=True, avg_down=True, **kwargs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/resnext.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/resnext.py new file mode 100644 index 0000000..805c27b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/resnext.py @@ -0,0 +1,150 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +from mmcv.cnn import build_conv_layer, build_norm_layer + +from ..builder import BACKBONES +from ..utils import ResLayer +from .resnet import Bottleneck as _Bottleneck +from .resnet import ResNet + + +class Bottleneck(_Bottleneck): + """Bottleneck block for ResNeXt. + + If style is "pytorch", the stride-two layer is the 3x3 conv layer, if it is + "caffe", the stride-two layer is the first 1x1 conv layer. + """ + + def __init__(self, + inplanes, + planes, + groups=1, + base_width=4, + base_channels=64, + **kwargs): + super(Bottleneck, self).__init__(inplanes, planes, **kwargs) + + if groups == 1: + width = self.planes + else: + width = math.floor(self.planes * + (base_width / base_channels)) * groups + + self.norm1_name, norm1 = build_norm_layer( + self.norm_cfg, width, postfix=1) + self.norm2_name, norm2 = build_norm_layer( + self.norm_cfg, width, postfix=2) + self.norm3_name, norm3 = build_norm_layer( + self.norm_cfg, self.planes * self.expansion, postfix=3) + + self.conv1 = build_conv_layer( + self.conv_cfg, + self.inplanes, + width, + kernel_size=1, + stride=self.conv1_stride, + bias=False) + self.add_module(self.norm1_name, norm1) + fallback_on_stride = False + self.with_modulated_dcn = False + if self.with_dcn: + fallback_on_stride = self.dcn.pop('fallback_on_stride', False) + if not self.with_dcn or fallback_on_stride: + self.conv2 = build_conv_layer( + self.conv_cfg, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + else: + assert self.conv_cfg is None, 'conv_cfg must be None for DCN' + self.conv2 = build_conv_layer( + self.dcn, + width, + width, + kernel_size=3, + stride=self.conv2_stride, + padding=self.dilation, + dilation=self.dilation, + groups=groups, + bias=False) + + self.add_module(self.norm2_name, norm2) + self.conv3 = build_conv_layer( + self.conv_cfg, + width, + self.planes * self.expansion, + kernel_size=1, + bias=False) + self.add_module(self.norm3_name, norm3) + + +@BACKBONES.register_module() +class ResNeXt(ResNet): + """ResNeXt backbone. + + This backbone is the implementation of `Aggregated + Residual Transformations for Deep Neural + Networks `_. + + Args: + depth (int): Depth of resnet, from {18, 34, 50, 101, 152}. + in_channels (int): Number of input image channels. Normally 3. + num_stages (int): Resnet stages, normally 4. + groups (int): Group of resnext. + base_width (int): Base width of resnext. + strides (Sequence[int]): Strides of the first block of each stage. + dilations (Sequence[int]): Dilation of each stage. + out_indices (Sequence[int]): Output from which stages. + style (str): `pytorch` or `caffe`. If set to "pytorch", the stride-two + layer is the 3x3 conv layer, otherwise the stride-two layer is + the first 1x1 conv layer. + frozen_stages (int): Stages to be frozen (all param fixed). -1 means + not freezing any parameters. + norm_cfg (dict): dictionary to construct and config norm layer. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. + zero_init_residual (bool): whether to use zero init for last norm layer + in resblocks to let them behave as identity. + + Example: + >>> from mmseg.models import ResNeXt + >>> import torch + >>> self = ResNeXt(depth=50) + >>> self.eval() + >>> inputs = torch.rand(1, 3, 32, 32) + >>> level_outputs = self.forward(inputs) + >>> for level_out in level_outputs: + ... print(tuple(level_out.shape)) + (1, 256, 8, 8) + (1, 512, 4, 4) + (1, 1024, 2, 2) + (1, 2048, 1, 1) + """ + + arch_settings = { + 50: (Bottleneck, (3, 4, 6, 3)), + 101: (Bottleneck, (3, 4, 23, 3)), + 152: (Bottleneck, (3, 8, 36, 3)) + } + + def __init__(self, groups=1, base_width=4, **kwargs): + self.groups = groups + self.base_width = base_width + super(ResNeXt, self).__init__(**kwargs) + + def make_res_layer(self, **kwargs): + """Pack all blocks in a stage into a ``ResLayer``""" + return ResLayer( + groups=self.groups, + base_width=self.base_width, + base_channels=self.base_channels, + **kwargs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/stdc.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/stdc.py new file mode 100644 index 0000000..04f2f7a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/stdc.py @@ -0,0 +1,422 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from https://github.com/MichaelFan01/STDC-Seg.""" +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmcv.runner.base_module import BaseModule, ModuleList, Sequential + +from mmseg.ops import resize +from ..builder import BACKBONES, build_backbone +from .bisenetv1 import AttentionRefinementModule + + +class STDCModule(BaseModule): + """STDCModule. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels before scaling. + stride (int): The number of stride for the first conv layer. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): The activation config for conv layers. + num_convs (int): Numbers of conv layers. + fusion_type (str): Type of fusion operation. Default: 'add'. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + stride, + norm_cfg=None, + act_cfg=None, + num_convs=4, + fusion_type='add', + init_cfg=None): + super(STDCModule, self).__init__(init_cfg=init_cfg) + assert num_convs > 1 + assert fusion_type in ['add', 'cat'] + self.stride = stride + self.with_downsample = True if self.stride == 2 else False + self.fusion_type = fusion_type + + self.layers = ModuleList() + conv_0 = ConvModule( + in_channels, out_channels // 2, kernel_size=1, norm_cfg=norm_cfg) + + if self.with_downsample: + self.downsample = ConvModule( + out_channels // 2, + out_channels // 2, + kernel_size=3, + stride=2, + padding=1, + groups=out_channels // 2, + norm_cfg=norm_cfg, + act_cfg=None) + + if self.fusion_type == 'add': + self.layers.append(nn.Sequential(conv_0, self.downsample)) + self.skip = Sequential( + ConvModule( + in_channels, + in_channels, + kernel_size=3, + stride=2, + padding=1, + groups=in_channels, + norm_cfg=norm_cfg, + act_cfg=None), + ConvModule( + in_channels, + out_channels, + 1, + norm_cfg=norm_cfg, + act_cfg=None)) + else: + self.layers.append(conv_0) + self.skip = nn.AvgPool2d(kernel_size=3, stride=2, padding=1) + else: + self.layers.append(conv_0) + + for i in range(1, num_convs): + out_factor = 2**(i + 1) if i != num_convs - 1 else 2**i + self.layers.append( + ConvModule( + out_channels // 2**i, + out_channels // out_factor, + kernel_size=3, + stride=1, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, inputs): + if self.fusion_type == 'add': + out = self.forward_add(inputs) + else: + out = self.forward_cat(inputs) + return out + + def forward_add(self, inputs): + layer_outputs = [] + x = inputs.clone() + for layer in self.layers: + x = layer(x) + layer_outputs.append(x) + if self.with_downsample: + inputs = self.skip(inputs) + + return torch.cat(layer_outputs, dim=1) + inputs + + def forward_cat(self, inputs): + x0 = self.layers[0](inputs) + layer_outputs = [x0] + for i, layer in enumerate(self.layers[1:]): + if i == 0: + if self.with_downsample: + x = layer(self.downsample(x0)) + else: + x = layer(x0) + else: + x = layer(x) + layer_outputs.append(x) + if self.with_downsample: + layer_outputs[0] = self.skip(x0) + return torch.cat(layer_outputs, dim=1) + + +class FeatureFusionModule(BaseModule): + """Feature Fusion Module. This module is different from FeatureFusionModule + in BiSeNetV1. It uses two ConvModules in `self.attention` whose inter + channel number is calculated by given `scale_factor`, while + FeatureFusionModule in BiSeNetV1 only uses one ConvModule in + `self.conv_atten`. + + Args: + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + scale_factor (int): The number of channel scale factor. + Default: 4. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): The activation config for conv layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + scale_factor=4, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(FeatureFusionModule, self).__init__(init_cfg=init_cfg) + channels = out_channels // scale_factor + self.conv0 = ConvModule( + in_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=act_cfg) + self.attention = nn.Sequential( + nn.AdaptiveAvgPool2d((1, 1)), + ConvModule( + out_channels, + channels, + 1, + norm_cfg=None, + bias=False, + act_cfg=act_cfg), + ConvModule( + channels, + out_channels, + 1, + norm_cfg=None, + bias=False, + act_cfg=None), nn.Sigmoid()) + + def forward(self, spatial_inputs, context_inputs): + inputs = torch.cat([spatial_inputs, context_inputs], dim=1) + x = self.conv0(inputs) + attn = self.attention(x) + x_attn = x * attn + return x_attn + x + + +@BACKBONES.register_module() +class STDCNet(BaseModule): + """This backbone is the implementation of `Rethinking BiSeNet For Real-time + Semantic Segmentation `_. + + Args: + stdc_type (int): The type of backbone structure, + `STDCNet1` and`STDCNet2` denotes two main backbones in paper, + whose FLOPs is 813M and 1446M, respectively. + in_channels (int): The num of input_channels. + channels (tuple[int]): The output channels for each stage. + bottleneck_type (str): The type of STDC Module type, the value must + be 'add' or 'cat'. + norm_cfg (dict): Config dict for normalization layer. + act_cfg (dict): The activation config for conv layers. + num_convs (int): Numbers of conv layer at each STDC Module. + Default: 4. + with_final_conv (bool): Whether add a conv layer at the Module output. + Default: True. + pretrained (str, optional): Model pretrained path. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Example: + >>> import torch + >>> stdc_type = 'STDCNet1' + >>> in_channels = 3 + >>> channels = (32, 64, 256, 512, 1024) + >>> bottleneck_type = 'cat' + >>> inputs = torch.rand(1, 3, 1024, 2048) + >>> self = STDCNet(stdc_type, in_channels, + ... channels, bottleneck_type).eval() + >>> outputs = self.forward(inputs) + >>> for i in range(len(outputs)): + ... print(f'outputs[{i}].shape = {outputs[i].shape}') + outputs[0].shape = torch.Size([1, 256, 128, 256]) + outputs[1].shape = torch.Size([1, 512, 64, 128]) + outputs[2].shape = torch.Size([1, 1024, 32, 64]) + """ + + arch_settings = { + 'STDCNet1': [(2, 1), (2, 1), (2, 1)], + 'STDCNet2': [(2, 1, 1, 1), (2, 1, 1, 1, 1), (2, 1, 1)] + } + + def __init__(self, + stdc_type, + in_channels, + channels, + bottleneck_type, + norm_cfg, + act_cfg, + num_convs=4, + with_final_conv=False, + pretrained=None, + init_cfg=None): + super(STDCNet, self).__init__(init_cfg=init_cfg) + assert stdc_type in self.arch_settings, \ + f'invalid structure {stdc_type} for STDCNet.' + assert bottleneck_type in ['add', 'cat'],\ + f'bottleneck_type must be `add` or `cat`, got {bottleneck_type}' + + assert len(channels) == 5,\ + f'invalid channels length {len(channels)} for STDCNet.' + + self.in_channels = in_channels + self.channels = channels + self.stage_strides = self.arch_settings[stdc_type] + self.prtrained = pretrained + self.num_convs = num_convs + self.with_final_conv = with_final_conv + + self.stages = ModuleList([ + ConvModule( + self.in_channels, + self.channels[0], + kernel_size=3, + stride=2, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg), + ConvModule( + self.channels[0], + self.channels[1], + kernel_size=3, + stride=2, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + ]) + # `self.num_shallow_features` is the number of shallow modules in + # `STDCNet`, which is noted as `Stage1` and `Stage2` in original paper. + # They are both not used for following modules like Attention + # Refinement Module and Feature Fusion Module. + # Thus they would be cut from `outs`. Please refer to Figure 4 + # of original paper for more details. + self.num_shallow_features = len(self.stages) + + for strides in self.stage_strides: + idx = len(self.stages) - 1 + self.stages.append( + self._make_stage(self.channels[idx], self.channels[idx + 1], + strides, norm_cfg, act_cfg, bottleneck_type)) + # After appending, `self.stages` is a ModuleList including several + # shallow modules and STDCModules. + # (len(self.stages) == + # self.num_shallow_features + len(self.stage_strides)) + if self.with_final_conv: + self.final_conv = ConvModule( + self.channels[-1], + max(1024, self.channels[-1]), + 1, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def _make_stage(self, in_channels, out_channels, strides, norm_cfg, + act_cfg, bottleneck_type): + layers = [] + for i, stride in enumerate(strides): + layers.append( + STDCModule( + in_channels if i == 0 else out_channels, + out_channels, + stride, + norm_cfg, + act_cfg, + num_convs=self.num_convs, + fusion_type=bottleneck_type)) + return Sequential(*layers) + + def forward(self, x): + outs = [] + for stage in self.stages: + x = stage(x) + outs.append(x) + if self.with_final_conv: + outs[-1] = self.final_conv(outs[-1]) + outs = outs[self.num_shallow_features:] + return tuple(outs) + + +@BACKBONES.register_module() +class STDCContextPathNet(BaseModule): + """STDCNet with Context Path. The `outs` below is a list of three feature + maps from deep to shallow, whose height and width is from small to big, + respectively. The biggest feature map of `outs` is outputted for + `STDCHead`, where Detail Loss would be calculated by Detail Ground-truth. + The other two feature maps are used for Attention Refinement Module, + respectively. Besides, the biggest feature map of `outs` and the last + output of Attention Refinement Module are concatenated for Feature Fusion + Module. Then, this fusion feature map `feat_fuse` would be outputted for + `decode_head`. More details please refer to Figure 4 of original paper. + + Args: + backbone_cfg (dict): Config dict for stdc backbone. + last_in_channels (tuple(int)), The number of channels of last + two feature maps from stdc backbone. Default: (1024, 512). + out_channels (int): The channels of output feature maps. + Default: 128. + ffm_cfg (dict): Config dict for Feature Fusion Module. Default: + `dict(in_channels=512, out_channels=256, scale_factor=4)`. + upsample_mode (str): Algorithm used for upsampling: + ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` | + ``'trilinear'``. Default: ``'nearest'``. + align_corners (str): align_corners argument of F.interpolate. It + must be `None` if upsample_mode is ``'nearest'``. Default: None. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Return: + outputs (tuple): The tuple of list of output feature map for + auxiliary heads and decoder head. + """ + + def __init__(self, + backbone_cfg, + last_in_channels=(1024, 512), + out_channels=128, + ffm_cfg=dict( + in_channels=512, out_channels=256, scale_factor=4), + upsample_mode='nearest', + align_corners=None, + norm_cfg=dict(type='BN'), + init_cfg=None): + super(STDCContextPathNet, self).__init__(init_cfg=init_cfg) + self.backbone = build_backbone(backbone_cfg) + self.arms = ModuleList() + self.convs = ModuleList() + for channels in last_in_channels: + self.arms.append(AttentionRefinementModule(channels, out_channels)) + self.convs.append( + ConvModule( + out_channels, + out_channels, + 3, + padding=1, + norm_cfg=norm_cfg)) + self.conv_avg = ConvModule( + last_in_channels[0], out_channels, 1, norm_cfg=norm_cfg) + + self.ffm = FeatureFusionModule(**ffm_cfg) + + self.upsample_mode = upsample_mode + self.align_corners = align_corners + + def forward(self, x): + outs = list(self.backbone(x)) + avg = F.adaptive_avg_pool2d(outs[-1], 1) + avg_feat = self.conv_avg(avg) + + feature_up = resize( + avg_feat, + size=outs[-1].shape[2:], + mode=self.upsample_mode, + align_corners=self.align_corners) + arms_out = [] + for i in range(len(self.arms)): + x_arm = self.arms[i](outs[len(outs) - 1 - i]) + feature_up + feature_up = resize( + x_arm, + size=outs[len(outs) - 1 - i - 1].shape[2:], + mode=self.upsample_mode, + align_corners=self.align_corners) + feature_up = self.convs[i](feature_up) + arms_out.append(feature_up) + + feat_fuse = self.ffm(outs[0], arms_out[1]) + + # The `outputs` has four feature maps. + # `outs[0]` is outputted for `STDCHead` auxiliary head. + # Two feature maps of `arms_out` are outputted for auxiliary head. + # `feat_fuse` is outputted for decoder head. + outputs = [outs[0]] + list(arms_out) + [feat_fuse] + return tuple(outputs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/swin.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/swin.py new file mode 100644 index 0000000..cbf1328 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/swin.py @@ -0,0 +1,756 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings +from collections import OrderedDict +from copy import deepcopy + +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.utils.checkpoint as cp +from mmcv.cnn import build_norm_layer +from mmcv.cnn.bricks.transformer import FFN, build_dropout +from mmcv.cnn.utils.weight_init import (constant_init, trunc_normal_, + trunc_normal_init) +from mmcv.runner import (BaseModule, CheckpointLoader, ModuleList, + load_state_dict) +from mmcv.utils import to_2tuple + +from ...utils import get_root_logger +from ..builder import BACKBONES +from ..utils.embed import PatchEmbed, PatchMerging + + +class WindowMSA(BaseModule): + """Window based multi-head self-attention (W-MSA) module with relative + position bias. + + Args: + embed_dims (int): Number of input channels. + num_heads (int): Number of attention heads. + window_size (tuple[int]): The height and width of the window. + qkv_bias (bool, optional): If True, add a learnable bias to q, k, v. + Default: True. + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + attn_drop_rate (float, optional): Dropout ratio of attention weight. + Default: 0.0 + proj_drop_rate (float, optional): Dropout ratio of output. Default: 0. + init_cfg (dict | None, optional): The Config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + window_size, + qkv_bias=True, + qk_scale=None, + attn_drop_rate=0., + proj_drop_rate=0., + init_cfg=None): + + super().__init__(init_cfg=init_cfg) + self.embed_dims = embed_dims + self.window_size = window_size # Wh, Ww + self.num_heads = num_heads + head_embed_dims = embed_dims // num_heads + self.scale = qk_scale or head_embed_dims**-0.5 + + # define a parameter table of relative position bias + self.relative_position_bias_table = nn.Parameter( + torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1), + num_heads)) # 2*Wh-1 * 2*Ww-1, nH + + # About 2x faster than original impl + Wh, Ww = self.window_size + rel_index_coords = self.double_step_seq(2 * Ww - 1, Wh, 1, Ww) + rel_position_index = rel_index_coords + rel_index_coords.T + rel_position_index = rel_position_index.flip(1).contiguous() + self.register_buffer('relative_position_index', rel_position_index) + + self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop_rate) + self.proj = nn.Linear(embed_dims, embed_dims) + self.proj_drop = nn.Dropout(proj_drop_rate) + + self.softmax = nn.Softmax(dim=-1) + + def init_weights(self): + trunc_normal_(self.relative_position_bias_table, std=0.02) + + def forward(self, x, mask=None): + """ + Args: + + x (tensor): input features with shape of (num_windows*B, N, C) + mask (tensor | None, Optional): mask with shape of (num_windows, + Wh*Ww, Wh*Ww), value should be between (-inf, 0]. + """ + B, N, C = x.shape + qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, + C // self.num_heads).permute(2, 0, 3, 1, 4) + # make torchscript happy (cannot use tensor as tuple) + q, k, v = qkv[0], qkv[1], qkv[2] + + q = q * self.scale + attn = (q @ k.transpose(-2, -1)) + + relative_position_bias = self.relative_position_bias_table[ + self.relative_position_index.view(-1)].view( + self.window_size[0] * self.window_size[1], + self.window_size[0] * self.window_size[1], + -1) # Wh*Ww,Wh*Ww,nH + relative_position_bias = relative_position_bias.permute( + 2, 0, 1).contiguous() # nH, Wh*Ww, Wh*Ww + attn = attn + relative_position_bias.unsqueeze(0) + + if mask is not None: + nW = mask.shape[0] + attn = attn.view(B // nW, nW, self.num_heads, N, + N) + mask.unsqueeze(1).unsqueeze(0) + attn = attn.view(-1, self.num_heads, N, N) + attn = self.softmax(attn) + + attn = self.attn_drop(attn) + + x = (attn @ v).transpose(1, 2).reshape(B, N, C) + x = self.proj(x) + x = self.proj_drop(x) + return x + + @staticmethod + def double_step_seq(step1, len1, step2, len2): + seq1 = torch.arange(0, step1 * len1, step1) + seq2 = torch.arange(0, step2 * len2, step2) + return (seq1[:, None] + seq2[None, :]).reshape(1, -1) + + +class ShiftWindowMSA(BaseModule): + """Shifted Window Multihead Self-Attention Module. + + Args: + embed_dims (int): Number of input channels. + num_heads (int): Number of attention heads. + window_size (int): The height and width of the window. + shift_size (int, optional): The shift step of each window towards + right-bottom. If zero, act as regular window-msa. Defaults to 0. + qkv_bias (bool, optional): If True, add a learnable bias to q, k, v. + Default: True + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Defaults: None. + attn_drop_rate (float, optional): Dropout ratio of attention weight. + Defaults: 0. + proj_drop_rate (float, optional): Dropout ratio of output. + Defaults: 0. + dropout_layer (dict, optional): The dropout_layer used before output. + Defaults: dict(type='DropPath', drop_prob=0.). + init_cfg (dict, optional): The extra config for initialization. + Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + window_size, + shift_size=0, + qkv_bias=True, + qk_scale=None, + attn_drop_rate=0, + proj_drop_rate=0, + dropout_layer=dict(type='DropPath', drop_prob=0.), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + self.window_size = window_size + self.shift_size = shift_size + assert 0 <= self.shift_size < self.window_size + + self.w_msa = WindowMSA( + embed_dims=embed_dims, + num_heads=num_heads, + window_size=to_2tuple(window_size), + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop_rate=attn_drop_rate, + proj_drop_rate=proj_drop_rate, + init_cfg=None) + + self.drop = build_dropout(dropout_layer) + + def forward(self, query, hw_shape): + B, L, C = query.shape + H, W = hw_shape + assert L == H * W, 'input feature has wrong size' + query = query.view(B, H, W, C) + + # pad feature maps to multiples of window size + pad_r = (self.window_size - W % self.window_size) % self.window_size + pad_b = (self.window_size - H % self.window_size) % self.window_size + query = F.pad(query, (0, 0, 0, pad_r, 0, pad_b)) + H_pad, W_pad = query.shape[1], query.shape[2] + + # cyclic shift + if self.shift_size > 0: + shifted_query = torch.roll( + query, + shifts=(-self.shift_size, -self.shift_size), + dims=(1, 2)) + + # calculate attention mask for SW-MSA + img_mask = torch.zeros((1, H_pad, W_pad, 1), device=query.device) + h_slices = (slice(0, -self.window_size), + slice(-self.window_size, + -self.shift_size), slice(-self.shift_size, None)) + w_slices = (slice(0, -self.window_size), + slice(-self.window_size, + -self.shift_size), slice(-self.shift_size, None)) + cnt = 0 + for h in h_slices: + for w in w_slices: + img_mask[:, h, w, :] = cnt + cnt += 1 + + # nW, window_size, window_size, 1 + mask_windows = self.window_partition(img_mask) + mask_windows = mask_windows.view( + -1, self.window_size * self.window_size) + attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2) + attn_mask = attn_mask.masked_fill(attn_mask != 0, + float(-100.0)).masked_fill( + attn_mask == 0, float(0.0)) + else: + shifted_query = query + attn_mask = None + + # nW*B, window_size, window_size, C + query_windows = self.window_partition(shifted_query) + # nW*B, window_size*window_size, C + query_windows = query_windows.view(-1, self.window_size**2, C) + + # W-MSA/SW-MSA (nW*B, window_size*window_size, C) + attn_windows = self.w_msa(query_windows, mask=attn_mask) + + # merge windows + attn_windows = attn_windows.view(-1, self.window_size, + self.window_size, C) + + # B H' W' C + shifted_x = self.window_reverse(attn_windows, H_pad, W_pad) + # reverse cyclic shift + if self.shift_size > 0: + x = torch.roll( + shifted_x, + shifts=(self.shift_size, self.shift_size), + dims=(1, 2)) + else: + x = shifted_x + + if pad_r > 0 or pad_b: + x = x[:, :H, :W, :].contiguous() + + x = x.view(B, H * W, C) + + x = self.drop(x) + return x + + def window_reverse(self, windows, H, W): + """ + Args: + windows: (num_windows*B, window_size, window_size, C) + H (int): Height of image + W (int): Width of image + Returns: + x: (B, H, W, C) + """ + window_size = self.window_size + B = int(windows.shape[0] / (H * W / window_size / window_size)) + x = windows.view(B, H // window_size, W // window_size, window_size, + window_size, -1) + x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1) + return x + + def window_partition(self, x): + """ + Args: + x: (B, H, W, C) + Returns: + windows: (num_windows*B, window_size, window_size, C) + """ + B, H, W, C = x.shape + window_size = self.window_size + x = x.view(B, H // window_size, window_size, W // window_size, + window_size, C) + windows = x.permute(0, 1, 3, 2, 4, 5).contiguous() + windows = windows.view(-1, window_size, window_size, C) + return windows + + +class SwinBlock(BaseModule): + """" + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + window_size (int, optional): The local window scale. Default: 7. + shift (bool, optional): whether to shift window or not. Default False. + qkv_bias (bool, optional): enable bias for qkv if True. Default: True. + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + drop_rate (float, optional): Dropout rate. Default: 0. + attn_drop_rate (float, optional): Attention dropout rate. Default: 0. + drop_path_rate (float, optional): Stochastic depth rate. Default: 0. + act_cfg (dict, optional): The config dict of activation function. + Default: dict(type='GELU'). + norm_cfg (dict, optional): The config dict of normalization. + Default: dict(type='LN'). + with_cp (bool, optional): Use checkpoint or not. Using checkpoint + will save some memory while slowing down the training speed. + Default: False. + init_cfg (dict | list | None, optional): The init config. + Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + window_size=7, + shift=False, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + with_cp=False, + init_cfg=None): + + super(SwinBlock, self).__init__(init_cfg=init_cfg) + + self.with_cp = with_cp + + self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1] + self.attn = ShiftWindowMSA( + embed_dims=embed_dims, + num_heads=num_heads, + window_size=window_size, + shift_size=window_size // 2 if shift else 0, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + attn_drop_rate=attn_drop_rate, + proj_drop_rate=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + init_cfg=None) + + self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1] + self.ffn = FFN( + embed_dims=embed_dims, + feedforward_channels=feedforward_channels, + num_fcs=2, + ffn_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + act_cfg=act_cfg, + add_identity=True, + init_cfg=None) + + def forward(self, x, hw_shape): + + def _inner_forward(x): + identity = x + x = self.norm1(x) + x = self.attn(x, hw_shape) + + x = x + identity + + identity = x + x = self.norm2(x) + x = self.ffn(x, identity=identity) + + return x + + if self.with_cp and x.requires_grad: + x = cp.checkpoint(_inner_forward, x) + else: + x = _inner_forward(x) + + return x + + +class SwinBlockSequence(BaseModule): + """Implements one stage in Swin Transformer. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + depth (int): The number of blocks in this stage. + window_size (int, optional): The local window scale. Default: 7. + qkv_bias (bool, optional): enable bias for qkv if True. Default: True. + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + drop_rate (float, optional): Dropout rate. Default: 0. + attn_drop_rate (float, optional): Attention dropout rate. Default: 0. + drop_path_rate (float | list[float], optional): Stochastic depth + rate. Default: 0. + downsample (BaseModule | None, optional): The downsample operation + module. Default: None. + act_cfg (dict, optional): The config dict of activation function. + Default: dict(type='GELU'). + norm_cfg (dict, optional): The config dict of normalization. + Default: dict(type='LN'). + with_cp (bool, optional): Use checkpoint or not. Using checkpoint + will save some memory while slowing down the training speed. + Default: False. + init_cfg (dict | list | None, optional): The init config. + Default: None. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + depth, + window_size=7, + qkv_bias=True, + qk_scale=None, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + downsample=None, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + with_cp=False, + init_cfg=None): + super().__init__(init_cfg=init_cfg) + + if isinstance(drop_path_rate, list): + drop_path_rates = drop_path_rate + assert len(drop_path_rates) == depth + else: + drop_path_rates = [deepcopy(drop_path_rate) for _ in range(depth)] + + self.blocks = ModuleList() + for i in range(depth): + block = SwinBlock( + embed_dims=embed_dims, + num_heads=num_heads, + feedforward_channels=feedforward_channels, + window_size=window_size, + shift=False if i % 2 == 0 else True, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop_rate=drop_rate, + attn_drop_rate=attn_drop_rate, + drop_path_rate=drop_path_rates[i], + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp, + init_cfg=None) + self.blocks.append(block) + + self.downsample = downsample + + def forward(self, x, hw_shape): + for block in self.blocks: + x = block(x, hw_shape) + + if self.downsample: + x_down, down_hw_shape = self.downsample(x, hw_shape) + return x_down, down_hw_shape, x, hw_shape + else: + return x, hw_shape, x, hw_shape + + +@BACKBONES.register_module() +class SwinTransformer(BaseModule): + """Swin Transformer backbone. + + This backbone is the implementation of `Swin Transformer: + Hierarchical Vision Transformer using Shifted + Windows `_. + Inspiration from https://github.com/microsoft/Swin-Transformer. + + Args: + pretrain_img_size (int | tuple[int]): The size of input image when + pretrain. Defaults: 224. + in_channels (int): The num of input channels. + Defaults: 3. + embed_dims (int): The feature dimension. Default: 96. + patch_size (int | tuple[int]): Patch size. Default: 4. + window_size (int): Window size. Default: 7. + mlp_ratio (int | float): Ratio of mlp hidden dim to embedding dim. + Default: 4. + depths (tuple[int]): Depths of each Swin Transformer stage. + Default: (2, 2, 6, 2). + num_heads (tuple[int]): Parallel attention heads of each Swin + Transformer stage. Default: (3, 6, 12, 24). + strides (tuple[int]): The patch merging or patch embedding stride of + each Swin Transformer stage. (In swin, we set kernel size equal to + stride.) Default: (4, 2, 2, 2). + out_indices (tuple[int]): Output from which stages. + Default: (0, 1, 2, 3). + qkv_bias (bool, optional): If True, add a learnable bias to query, key, + value. Default: True + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + patch_norm (bool): If add a norm layer for patch embed and patch + merging. Default: True. + drop_rate (float): Dropout rate. Defaults: 0. + attn_drop_rate (float): Attention dropout rate. Default: 0. + drop_path_rate (float): Stochastic depth rate. Defaults: 0.1. + use_abs_pos_embed (bool): If True, add absolute position embedding to + the patch embedding. Defaults: False. + act_cfg (dict): Config dict for activation layer. + Default: dict(type='LN'). + norm_cfg (dict): Config dict for normalization layer at + output of backone. Defaults: dict(type='LN'). + with_cp (bool, optional): Use checkpoint or not. Using checkpoint + will save some memory while slowing down the training speed. + Default: False. + pretrained (str, optional): model pretrained path. Default: None. + frozen_stages (int): Stages to be frozen (stop grad and set eval mode). + -1 means not freezing any parameters. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + pretrain_img_size=224, + in_channels=3, + embed_dims=96, + patch_size=4, + window_size=7, + mlp_ratio=4, + depths=(2, 2, 6, 2), + num_heads=(3, 6, 12, 24), + strides=(4, 2, 2, 2), + out_indices=(0, 1, 2, 3), + qkv_bias=True, + qk_scale=None, + patch_norm=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.1, + use_abs_pos_embed=False, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + with_cp=False, + pretrained=None, + frozen_stages=-1, + init_cfg=None): + self.frozen_stages = frozen_stages + + if isinstance(pretrain_img_size, int): + pretrain_img_size = to_2tuple(pretrain_img_size) + elif isinstance(pretrain_img_size, tuple): + if len(pretrain_img_size) == 1: + pretrain_img_size = to_2tuple(pretrain_img_size[0]) + assert len(pretrain_img_size) == 2, \ + f'The size of image should have length 1 or 2, ' \ + f'but got {len(pretrain_img_size)}' + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be specified at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + init_cfg = init_cfg + else: + raise TypeError('pretrained must be a str or None') + + super(SwinTransformer, self).__init__(init_cfg=init_cfg) + + num_layers = len(depths) + self.out_indices = out_indices + self.use_abs_pos_embed = use_abs_pos_embed + + assert strides[0] == patch_size, 'Use non-overlapping patch embed.' + + self.patch_embed = PatchEmbed( + in_channels=in_channels, + embed_dims=embed_dims, + conv_type='Conv2d', + kernel_size=patch_size, + stride=strides[0], + padding='corner', + norm_cfg=norm_cfg if patch_norm else None, + init_cfg=None) + + if self.use_abs_pos_embed: + patch_row = pretrain_img_size[0] // patch_size + patch_col = pretrain_img_size[1] // patch_size + num_patches = patch_row * patch_col + self.absolute_pos_embed = nn.Parameter( + torch.zeros((1, num_patches, embed_dims))) + + self.drop_after_pos = nn.Dropout(p=drop_rate) + + # set stochastic depth decay rule + total_depth = sum(depths) + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, total_depth) + ] + + self.stages = ModuleList() + in_channels = embed_dims + for i in range(num_layers): + if i < num_layers - 1: + downsample = PatchMerging( + in_channels=in_channels, + out_channels=2 * in_channels, + stride=strides[i + 1], + norm_cfg=norm_cfg if patch_norm else None, + init_cfg=None) + else: + downsample = None + + stage = SwinBlockSequence( + embed_dims=in_channels, + num_heads=num_heads[i], + feedforward_channels=int(mlp_ratio * in_channels), + depth=depths[i], + window_size=window_size, + qkv_bias=qkv_bias, + qk_scale=qk_scale, + drop_rate=drop_rate, + attn_drop_rate=attn_drop_rate, + drop_path_rate=dpr[sum(depths[:i]):sum(depths[:i + 1])], + downsample=downsample, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp, + init_cfg=None) + self.stages.append(stage) + if downsample: + in_channels = downsample.out_channels + + self.num_features = [int(embed_dims * 2**i) for i in range(num_layers)] + # Add a norm layer for each output + for i in out_indices: + layer = build_norm_layer(norm_cfg, self.num_features[i])[1] + layer_name = f'norm{i}' + self.add_module(layer_name, layer) + + def train(self, mode=True): + """Convert the model into training mode while keep layers freezed.""" + super(SwinTransformer, self).train(mode) + self._freeze_stages() + + def _freeze_stages(self): + if self.frozen_stages >= 0: + self.patch_embed.eval() + for param in self.patch_embed.parameters(): + param.requires_grad = False + if self.use_abs_pos_embed: + self.absolute_pos_embed.requires_grad = False + self.drop_after_pos.eval() + + for i in range(1, self.frozen_stages + 1): + + if (i - 1) in self.out_indices: + norm_layer = getattr(self, f'norm{i-1}') + norm_layer.eval() + for param in norm_layer.parameters(): + param.requires_grad = False + + m = self.stages[i - 1] + m.eval() + for param in m.parameters(): + param.requires_grad = False + + def init_weights(self): + logger = get_root_logger() + if self.init_cfg is None: + logger.warn(f'No pre-trained weights for ' + f'{self.__class__.__name__}, ' + f'training start from scratch') + if self.use_abs_pos_embed: + trunc_normal_(self.absolute_pos_embed, std=0.02) + for m in self.modules(): + if isinstance(m, nn.Linear): + trunc_normal_init(m, std=.02, bias=0.) + elif isinstance(m, nn.LayerNorm): + constant_init(m, val=1.0, bias=0.) + else: + assert 'checkpoint' in self.init_cfg, f'Only support ' \ + f'specify `Pretrained` in ' \ + f'`init_cfg` in ' \ + f'{self.__class__.__name__} ' + ckpt = CheckpointLoader.load_checkpoint( + self.init_cfg['checkpoint'], logger=logger, map_location='cpu') + if 'state_dict' in ckpt: + _state_dict = ckpt['state_dict'] + elif 'model' in ckpt: + _state_dict = ckpt['model'] + else: + _state_dict = ckpt + + state_dict = OrderedDict() + for k, v in _state_dict.items(): + if k.startswith('backbone.'): + state_dict[k[9:]] = v + else: + state_dict[k] = v + + # strip prefix of state_dict + if list(state_dict.keys())[0].startswith('module.'): + state_dict = {k[7:]: v for k, v in state_dict.items()} + + # reshape absolute position embedding + if state_dict.get('absolute_pos_embed') is not None: + absolute_pos_embed = state_dict['absolute_pos_embed'] + N1, L, C1 = absolute_pos_embed.size() + N2, C2, H, W = self.absolute_pos_embed.size() + if N1 != N2 or C1 != C2 or L != H * W: + logger.warning('Error in loading absolute_pos_embed, pass') + else: + state_dict['absolute_pos_embed'] = absolute_pos_embed.view( + N2, H, W, C2).permute(0, 3, 1, 2).contiguous() + + # interpolate position bias table if needed + relative_position_bias_table_keys = [ + k for k in state_dict.keys() + if 'relative_position_bias_table' in k + ] + for table_key in relative_position_bias_table_keys: + table_pretrained = state_dict[table_key] + table_current = self.state_dict()[table_key] + L1, nH1 = table_pretrained.size() + L2, nH2 = table_current.size() + if nH1 != nH2: + logger.warning(f'Error in loading {table_key}, pass') + elif L1 != L2: + S1 = int(L1**0.5) + S2 = int(L2**0.5) + table_pretrained_resized = F.interpolate( + table_pretrained.permute(1, 0).reshape(1, nH1, S1, S1), + size=(S2, S2), + mode='bicubic') + state_dict[table_key] = table_pretrained_resized.view( + nH2, L2).permute(1, 0).contiguous() + + # load state_dict + load_state_dict(self, state_dict, strict=False, logger=logger) + + def forward(self, x): + x, hw_shape = self.patch_embed(x) + + if self.use_abs_pos_embed: + x = x + self.absolute_pos_embed + x = self.drop_after_pos(x) + + outs = [] + for i, stage in enumerate(self.stages): + x, hw_shape, out, out_hw_shape = stage(x, hw_shape) + if i in self.out_indices: + norm_layer = getattr(self, f'norm{i}') + out = norm_layer(out) + out = out.view(-1, *out_hw_shape, + self.num_features[i]).permute(0, 3, 1, + 2).contiguous() + outs.append(out) + + return outs diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/timm_backbone.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/timm_backbone.py new file mode 100644 index 0000000..01b29fc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/timm_backbone.py @@ -0,0 +1,63 @@ +# Copyright (c) OpenMMLab. All rights reserved. +try: + import timm +except ImportError: + timm = None + +from mmcv.cnn.bricks.registry import NORM_LAYERS +from mmcv.runner import BaseModule + +from ..builder import BACKBONES + + +@BACKBONES.register_module() +class TIMMBackbone(BaseModule): + """Wrapper to use backbones from timm library. More details can be found in + `timm `_ . + + Args: + model_name (str): Name of timm model to instantiate. + pretrained (bool): Load pretrained weights if True. + checkpoint_path (str): Path of checkpoint to load after + model is initialized. + in_channels (int): Number of input image channels. Default: 3. + init_cfg (dict, optional): Initialization config dict + **kwargs: Other timm & model specific arguments. + """ + + def __init__( + self, + model_name, + features_only=True, + pretrained=True, + checkpoint_path='', + in_channels=3, + init_cfg=None, + **kwargs, + ): + if timm is None: + raise RuntimeError('timm is not installed') + super(TIMMBackbone, self).__init__(init_cfg) + if 'norm_layer' in kwargs: + kwargs['norm_layer'] = NORM_LAYERS.get(kwargs['norm_layer']) + self.timm_model = timm.create_model( + model_name=model_name, + features_only=features_only, + pretrained=pretrained, + in_chans=in_channels, + checkpoint_path=checkpoint_path, + **kwargs, + ) + + # Make unused parameters None + self.timm_model.global_pool = None + self.timm_model.fc = None + self.timm_model.classifier = None + + # Hack to use pretrained weights from timm + if pretrained or checkpoint_path: + self._is_init = True + + def forward(self, x): + features = self.timm_model(x) + return features diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/twins.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/twins.py new file mode 100644 index 0000000..6bd9469 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/twins.py @@ -0,0 +1,588 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import warnings + +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import build_norm_layer +from mmcv.cnn.bricks.drop import build_dropout +from mmcv.cnn.bricks.transformer import FFN +from mmcv.cnn.utils.weight_init import (constant_init, normal_init, + trunc_normal_init) +from mmcv.runner import BaseModule, ModuleList +from torch.nn.modules.batchnorm import _BatchNorm + +from mmseg.models.backbones.mit import EfficientMultiheadAttention +from mmseg.models.builder import BACKBONES +from ..utils.embed import PatchEmbed + + +class GlobalSubsampledAttention(EfficientMultiheadAttention): + """Global Sub-sampled Attention (Spatial Reduction Attention) + + This module is modified from EfficientMultiheadAttention, + which is a module from mmseg.models.backbones.mit.py. + Specifically, there is no difference between + `GlobalSubsampledAttention` and `EfficientMultiheadAttention`, + `GlobalSubsampledAttention` is built as a brand new class + because it is renamed as `Global sub-sampled attention (GSA)` + in paper. + + + Args: + embed_dims (int): The embedding dimension. + num_heads (int): Parallel attention heads. + attn_drop (float): A Dropout layer on attn_output_weights. + Default: 0.0. + proj_drop (float): A Dropout layer after `nn.MultiheadAttention`. + Default: 0.0. + dropout_layer (obj:`ConfigDict`): The dropout_layer used + when adding the shortcut. Default: None. + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dims) + or (n, batch, embed_dims). Default: False. + qkv_bias (bool): enable bias for qkv if True. Default: True. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + sr_ratio (int): The ratio of spatial reduction of GSA of PCPVT. + Default: 1. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + embed_dims, + num_heads, + attn_drop=0., + proj_drop=0., + dropout_layer=None, + batch_first=True, + qkv_bias=True, + norm_cfg=dict(type='LN'), + sr_ratio=1, + init_cfg=None): + super(GlobalSubsampledAttention, self).__init__( + embed_dims, + num_heads, + attn_drop=attn_drop, + proj_drop=proj_drop, + dropout_layer=dropout_layer, + batch_first=batch_first, + qkv_bias=qkv_bias, + norm_cfg=norm_cfg, + sr_ratio=sr_ratio, + init_cfg=init_cfg) + + +class GSAEncoderLayer(BaseModule): + """Implements one encoder layer with GSA. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + drop_rate (float): Probability of an element to be zeroed + after the feed forward layer. Default: 0.0. + attn_drop_rate (float): The drop out rate for attention layer. + Default: 0.0. + drop_path_rate (float): Stochastic depth rate. Default 0.0. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + qkv_bias (bool): Enable bias for qkv if True. Default: True + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + sr_ratio (float): Kernel_size of conv in Attention modules. Default: 1. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + num_fcs=2, + qkv_bias=True, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + sr_ratio=1., + init_cfg=None): + super(GSAEncoderLayer, self).__init__(init_cfg=init_cfg) + + self.norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1)[1] + self.attn = GlobalSubsampledAttention( + embed_dims=embed_dims, + num_heads=num_heads, + attn_drop=attn_drop_rate, + proj_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + qkv_bias=qkv_bias, + norm_cfg=norm_cfg, + sr_ratio=sr_ratio) + + self.norm2 = build_norm_layer(norm_cfg, embed_dims, postfix=2)[1] + self.ffn = FFN( + embed_dims=embed_dims, + feedforward_channels=feedforward_channels, + num_fcs=num_fcs, + ffn_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + act_cfg=act_cfg, + add_identity=False) + + self.drop_path = build_dropout( + dict(type='DropPath', drop_prob=drop_path_rate) + ) if drop_path_rate > 0. else nn.Identity() + + def forward(self, x, hw_shape): + x = x + self.drop_path(self.attn(self.norm1(x), hw_shape, identity=0.)) + x = x + self.drop_path(self.ffn(self.norm2(x))) + return x + + +class LocallyGroupedSelfAttention(BaseModule): + """Locally-grouped Self Attention (LSA) module. + + Args: + embed_dims (int): Number of input channels. + num_heads (int): Number of attention heads. Default: 8 + qkv_bias (bool, optional): If True, add a learnable bias to q, k, v. + Default: False. + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + attn_drop_rate (float, optional): Dropout ratio of attention weight. + Default: 0.0 + proj_drop_rate (float, optional): Dropout ratio of output. Default: 0. + window_size(int): Window size of LSA. Default: 1. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + embed_dims, + num_heads=8, + qkv_bias=False, + qk_scale=None, + attn_drop_rate=0., + proj_drop_rate=0., + window_size=1, + init_cfg=None): + super(LocallyGroupedSelfAttention, self).__init__(init_cfg=init_cfg) + + assert embed_dims % num_heads == 0, f'dim {embed_dims} should be ' \ + f'divided by num_heads ' \ + f'{num_heads}.' + self.embed_dims = embed_dims + self.num_heads = num_heads + head_dim = embed_dims // num_heads + self.scale = qk_scale or head_dim**-0.5 + + self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias) + self.attn_drop = nn.Dropout(attn_drop_rate) + self.proj = nn.Linear(embed_dims, embed_dims) + self.proj_drop = nn.Dropout(proj_drop_rate) + self.window_size = window_size + + def forward(self, x, hw_shape): + b, n, c = x.shape + h, w = hw_shape + x = x.view(b, h, w, c) + + # pad feature maps to multiples of Local-groups + pad_l = pad_t = 0 + pad_r = (self.window_size - w % self.window_size) % self.window_size + pad_b = (self.window_size - h % self.window_size) % self.window_size + x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b)) + + # calculate attention mask for LSA + Hp, Wp = x.shape[1:-1] + _h, _w = Hp // self.window_size, Wp // self.window_size + mask = torch.zeros((1, Hp, Wp), device=x.device) + mask[:, -pad_b:, :].fill_(1) + mask[:, :, -pad_r:].fill_(1) + + # [B, _h, _w, window_size, window_size, C] + x = x.reshape(b, _h, self.window_size, _w, self.window_size, + c).transpose(2, 3) + mask = mask.reshape(1, _h, self.window_size, _w, + self.window_size).transpose(2, 3).reshape( + 1, _h * _w, + self.window_size * self.window_size) + # [1, _h*_w, window_size*window_size, window_size*window_size] + attn_mask = mask.unsqueeze(2) - mask.unsqueeze(3) + attn_mask = attn_mask.masked_fill(attn_mask != 0, + float(-1000.0)).masked_fill( + attn_mask == 0, float(0.0)) + + # [3, B, _w*_h, nhead, window_size*window_size, dim] + qkv = self.qkv(x).reshape(b, _h * _w, + self.window_size * self.window_size, 3, + self.num_heads, c // self.num_heads).permute( + 3, 0, 1, 4, 2, 5) + q, k, v = qkv[0], qkv[1], qkv[2] + # [B, _h*_w, n_head, window_size*window_size, window_size*window_size] + attn = (q @ k.transpose(-2, -1)) * self.scale + attn = attn + attn_mask.unsqueeze(2) + attn = attn.softmax(dim=-1) + attn = self.attn_drop(attn) + attn = (attn @ v).transpose(2, 3).reshape(b, _h, _w, self.window_size, + self.window_size, c) + x = attn.transpose(2, 3).reshape(b, _h * self.window_size, + _w * self.window_size, c) + if pad_r > 0 or pad_b > 0: + x = x[:, :h, :w, :].contiguous() + + x = x.reshape(b, n, c) + x = self.proj(x) + x = self.proj_drop(x) + return x + + +class LSAEncoderLayer(BaseModule): + """Implements one encoder layer in Twins-SVT. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + drop_rate (float): Probability of an element to be zeroed + after the feed forward layer. Default: 0.0. + attn_drop_rate (float, optional): Dropout ratio of attention weight. + Default: 0.0 + drop_path_rate (float): Stochastic depth rate. Default 0.0. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + qkv_bias (bool): Enable bias for qkv if True. Default: True + qk_scale (float | None, optional): Override default qk scale of + head_dim ** -0.5 if set. Default: None. + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + window_size (int): Window size of LSA. Default: 1. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + num_fcs=2, + qkv_bias=True, + qk_scale=None, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + window_size=1, + init_cfg=None): + + super(LSAEncoderLayer, self).__init__(init_cfg=init_cfg) + + self.norm1 = build_norm_layer(norm_cfg, embed_dims, postfix=1)[1] + self.attn = LocallyGroupedSelfAttention(embed_dims, num_heads, + qkv_bias, qk_scale, + attn_drop_rate, drop_rate, + window_size) + + self.norm2 = build_norm_layer(norm_cfg, embed_dims, postfix=2)[1] + self.ffn = FFN( + embed_dims=embed_dims, + feedforward_channels=feedforward_channels, + num_fcs=num_fcs, + ffn_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate), + act_cfg=act_cfg, + add_identity=False) + + self.drop_path = build_dropout( + dict(type='DropPath', drop_prob=drop_path_rate) + ) if drop_path_rate > 0. else nn.Identity() + + def forward(self, x, hw_shape): + x = x + self.drop_path(self.attn(self.norm1(x), hw_shape)) + x = x + self.drop_path(self.ffn(self.norm2(x))) + return x + + +class ConditionalPositionEncoding(BaseModule): + """The Conditional Position Encoding (CPE) module. + + The CPE is the implementation of 'Conditional Positional Encodings + for Vision Transformers '_. + + Args: + in_channels (int): Number of input channels. + embed_dims (int): The feature dimension. Default: 768. + stride (int): Stride of conv layer. Default: 1. + """ + + def __init__(self, in_channels, embed_dims=768, stride=1, init_cfg=None): + super(ConditionalPositionEncoding, self).__init__(init_cfg=init_cfg) + self.proj = nn.Conv2d( + in_channels, + embed_dims, + kernel_size=3, + stride=stride, + padding=1, + bias=True, + groups=embed_dims) + self.stride = stride + + def forward(self, x, hw_shape): + b, n, c = x.shape + h, w = hw_shape + feat_token = x + cnn_feat = feat_token.transpose(1, 2).view(b, c, h, w) + if self.stride == 1: + x = self.proj(cnn_feat) + cnn_feat + else: + x = self.proj(cnn_feat) + x = x.flatten(2).transpose(1, 2) + return x + + +@BACKBONES.register_module() +class PCPVT(BaseModule): + """The backbone of Twins-PCPVT. + + This backbone is the implementation of `Twins: Revisiting the Design + of Spatial Attention in Vision Transformers + `_. + + Args: + in_channels (int): Number of input channels. Default: 3. + embed_dims (list): Embedding dimension. Default: [64, 128, 256, 512]. + patch_sizes (list): The patch sizes. Default: [4, 2, 2, 2]. + strides (list): The strides. Default: [4, 2, 2, 2]. + num_heads (int): Number of attention heads. Default: [1, 2, 4, 8]. + mlp_ratios (int): Ratio of mlp hidden dim to embedding dim. + Default: [4, 4, 4, 4]. + out_indices (tuple[int]): Output from which stages. + Default: (0, 1, 2, 3). + qkv_bias (bool): Enable bias for qkv if True. Default: False. + drop_rate (float): Probability of an element to be zeroed. + Default 0. + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + drop_path_rate (float): Stochastic depth rate. Default 0.0 + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + depths (list): Depths of each stage. Default [3, 4, 6, 3] + sr_ratios (list): Kernel_size of conv in each Attn module in + Transformer encoder layer. Default: [8, 4, 2, 1]. + norm_after_stage(bool): Add extra norm. Default False. + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + in_channels=3, + embed_dims=[64, 128, 256, 512], + patch_sizes=[4, 2, 2, 2], + strides=[4, 2, 2, 2], + num_heads=[1, 2, 4, 8], + mlp_ratios=[4, 4, 4, 4], + out_indices=(0, 1, 2, 3), + qkv_bias=False, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN'), + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + norm_after_stage=False, + pretrained=None, + init_cfg=None): + super(PCPVT, self).__init__(init_cfg=init_cfg) + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be set at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + self.depths = depths + + # patch_embed + self.patch_embeds = ModuleList() + self.position_encoding_drops = ModuleList() + self.layers = ModuleList() + + for i in range(len(depths)): + self.patch_embeds.append( + PatchEmbed( + in_channels=in_channels if i == 0 else embed_dims[i - 1], + embed_dims=embed_dims[i], + conv_type='Conv2d', + kernel_size=patch_sizes[i], + stride=strides[i], + padding='corner', + norm_cfg=norm_cfg)) + + self.position_encoding_drops.append(nn.Dropout(p=drop_rate)) + + self.position_encodings = ModuleList([ + ConditionalPositionEncoding(embed_dim, embed_dim) + for embed_dim in embed_dims + ]) + + # transformer encoder + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + cur = 0 + + for k in range(len(depths)): + _block = ModuleList([ + GSAEncoderLayer( + embed_dims=embed_dims[k], + num_heads=num_heads[k], + feedforward_channels=mlp_ratios[k] * embed_dims[k], + attn_drop_rate=attn_drop_rate, + drop_rate=drop_rate, + drop_path_rate=dpr[cur + i], + num_fcs=2, + qkv_bias=qkv_bias, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + sr_ratio=sr_ratios[k]) for i in range(depths[k]) + ]) + self.layers.append(_block) + cur += depths[k] + + self.norm_name, norm = build_norm_layer( + norm_cfg, embed_dims[-1], postfix=1) + + self.out_indices = out_indices + self.norm_after_stage = norm_after_stage + if self.norm_after_stage: + self.norm_list = ModuleList() + for dim in embed_dims: + self.norm_list.append(build_norm_layer(norm_cfg, dim)[1]) + + def init_weights(self): + if self.init_cfg is not None: + super(PCPVT, self).init_weights() + else: + for m in self.modules(): + if isinstance(m, nn.Linear): + trunc_normal_init(m, std=.02, bias=0.) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m, val=1.0, bias=0.) + elif isinstance(m, nn.Conv2d): + fan_out = m.kernel_size[0] * m.kernel_size[ + 1] * m.out_channels + fan_out //= m.groups + normal_init( + m, mean=0, std=math.sqrt(2.0 / fan_out), bias=0) + + def forward(self, x): + outputs = list() + + b = x.shape[0] + + for i in range(len(self.depths)): + x, hw_shape = self.patch_embeds[i](x) + h, w = hw_shape + x = self.position_encoding_drops[i](x) + for j, blk in enumerate(self.layers[i]): + x = blk(x, hw_shape) + if j == 0: + x = self.position_encodings[i](x, hw_shape) + if self.norm_after_stage: + x = self.norm_list[i](x) + x = x.reshape(b, h, w, -1).permute(0, 3, 1, 2).contiguous() + + if i in self.out_indices: + outputs.append(x) + + return tuple(outputs) + + +@BACKBONES.register_module() +class SVT(PCPVT): + """The backbone of Twins-SVT. + + This backbone is the implementation of `Twins: Revisiting the Design + of Spatial Attention in Vision Transformers + `_. + + Args: + in_channels (int): Number of input channels. Default: 3. + embed_dims (list): Embedding dimension. Default: [64, 128, 256, 512]. + patch_sizes (list): The patch sizes. Default: [4, 2, 2, 2]. + strides (list): The strides. Default: [4, 2, 2, 2]. + num_heads (int): Number of attention heads. Default: [1, 2, 4]. + mlp_ratios (int): Ratio of mlp hidden dim to embedding dim. + Default: [4, 4, 4]. + out_indices (tuple[int]): Output from which stages. + Default: (0, 1, 2, 3). + qkv_bias (bool): Enable bias for qkv if True. Default: False. + drop_rate (float): Dropout rate. Default 0. + attn_drop_rate (float): Dropout ratio of attention weight. + Default 0.0 + drop_path_rate (float): Stochastic depth rate. Default 0.2. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + depths (list): Depths of each stage. Default [4, 4, 4]. + sr_ratios (list): Kernel_size of conv in each Attn module in + Transformer encoder layer. Default: [4, 2, 1]. + windiow_sizes (list): Window size of LSA. Default: [7, 7, 7], + input_features_slice(bool): Input features need slice. Default: False. + norm_after_stage(bool): Add extra norm. Default False. + strides (list): Strides in patch-Embedding modules. Default: (2, 2, 2) + init_cfg (dict, optional): The Config for initialization. + Defaults to None. + """ + + def __init__(self, + in_channels=3, + embed_dims=[64, 128, 256], + patch_sizes=[4, 2, 2, 2], + strides=[4, 2, 2, 2], + num_heads=[1, 2, 4], + mlp_ratios=[4, 4, 4], + out_indices=(0, 1, 2, 3), + qkv_bias=False, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0.2, + norm_cfg=dict(type='LN'), + depths=[4, 4, 4], + sr_ratios=[4, 2, 1], + windiow_sizes=[7, 7, 7], + norm_after_stage=True, + pretrained=None, + init_cfg=None): + super(SVT, self).__init__(in_channels, embed_dims, patch_sizes, + strides, num_heads, mlp_ratios, out_indices, + qkv_bias, drop_rate, attn_drop_rate, + drop_path_rate, norm_cfg, depths, sr_ratios, + norm_after_stage, pretrained, init_cfg) + # transformer encoder + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, sum(depths)) + ] # stochastic depth decay rule + + for k in range(len(depths)): + for i in range(depths[k]): + if i % 2 == 0: + self.layers[k][i] = \ + LSAEncoderLayer( + embed_dims=embed_dims[k], + num_heads=num_heads[k], + feedforward_channels=mlp_ratios[k] * embed_dims[k], + drop_rate=drop_rate, + attn_drop_rate=attn_drop_rate, + drop_path_rate=dpr[sum(depths[:k])+i], + qkv_bias=qkv_bias, + window_size=windiow_sizes[k]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/unet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/unet.py new file mode 100644 index 0000000..c2d3366 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/unet.py @@ -0,0 +1,438 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import (UPSAMPLE_LAYERS, ConvModule, build_activation_layer, + build_norm_layer) +from mmcv.runner import BaseModule +from mmcv.utils.parrots_wrapper import _BatchNorm + +from mmseg.ops import Upsample +from ..builder import BACKBONES +from ..utils import UpConvBlock + + +class BasicConvBlock(nn.Module): + """Basic convolutional block for UNet. + + This module consists of several plain convolutional layers. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers. Default: 2. + stride (int): Whether use stride convolution to downsample + the input feature map. If stride=2, it only uses stride convolution + in the first convolutional layer to downsample the input feature + map. Options are 1 or 2. Default: 1. + dilation (int): Whether use dilated convolution to expand the + receptive field. Set dilation rate of each convolutional layer and + the dilation rate of the first convolutional layer is always 1. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + dcn=None, + plugins=None): + super(BasicConvBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.with_cp = with_cp + convs = [] + for i in range(num_convs): + convs.append( + ConvModule( + in_channels=in_channels if i == 0 else out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride if i == 0 else 1, + dilation=1 if i == 0 else dilation, + padding=1 if i == 0 else dilation, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + self.convs = nn.Sequential(*convs) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.convs, x) + else: + out = self.convs(x) + return out + + +@UPSAMPLE_LAYERS.register_module() +class DeconvModule(nn.Module): + """Deconvolution upsample module in decoder for UNet (2X upsample). + + This module uses deconvolution to upsample feature map in the decoder + of UNet. + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + kernel_size (int): Kernel size of the convolutional layer. Default: 4. + """ + + def __init__(self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + kernel_size=4, + scale_factor=2): + super(DeconvModule, self).__init__() + + assert (kernel_size - scale_factor >= 0) and\ + (kernel_size - scale_factor) % 2 == 0,\ + f'kernel_size should be greater than or equal to scale_factor '\ + f'and (kernel_size - scale_factor) should be even numbers, '\ + f'while the kernel size is {kernel_size} and scale_factor is '\ + f'{scale_factor}.' + + stride = scale_factor + padding = (kernel_size - scale_factor) // 2 + self.with_cp = with_cp + deconv = nn.ConvTranspose2d( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding) + + norm_name, norm = build_norm_layer(norm_cfg, out_channels) + activate = build_activation_layer(act_cfg) + self.deconv_upsamping = nn.Sequential(deconv, norm, activate) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.deconv_upsamping, x) + else: + out = self.deconv_upsamping(x) + return out + + +@UPSAMPLE_LAYERS.register_module() +class InterpConv(nn.Module): + """Interpolation upsample module in decoder for UNet. + + This module uses interpolation to upsample feature map in the decoder + of UNet. It consists of one interpolation upsample layer and one + convolutional layer. It can be one interpolation upsample layer followed + by one convolutional layer (conv_first=False) or one convolutional layer + followed by one interpolation upsample layer (conv_first=True). + + Args: + in_channels (int): Number of input channels. + out_channels (int): Number of output channels. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + conv_first (bool): Whether convolutional layer or interpolation + upsample layer first. Default: False. It means interpolation + upsample layer followed by one convolutional layer. + kernel_size (int): Kernel size of the convolutional layer. Default: 1. + stride (int): Stride of the convolutional layer. Default: 1. + padding (int): Padding of the convolutional layer. Default: 1. + upsample_cfg (dict): Interpolation config of the upsample layer. + Default: dict( + scale_factor=2, mode='bilinear', align_corners=False). + """ + + def __init__(self, + in_channels, + out_channels, + with_cp=False, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + *, + conv_cfg=None, + conv_first=False, + kernel_size=1, + stride=1, + padding=0, + upsample_cfg=dict( + scale_factor=2, mode='bilinear', align_corners=False)): + super(InterpConv, self).__init__() + + self.with_cp = with_cp + conv = ConvModule( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + upsample = Upsample(**upsample_cfg) + if conv_first: + self.interp_upsample = nn.Sequential(conv, upsample) + else: + self.interp_upsample = nn.Sequential(upsample, conv) + + def forward(self, x): + """Forward function.""" + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(self.interp_upsample, x) + else: + out = self.interp_upsample(x) + return out + + +@BACKBONES.register_module() +class UNet(BaseModule): + """UNet backbone. + + This backbone is the implementation of `U-Net: Convolutional Networks + for Biomedical Image Segmentation `_. + + Args: + in_channels (int): Number of input image channels. Default" 3. + base_channels (int): Number of base channels of each stage. + The output channels of the first stage. Default: 64. + num_stages (int): Number of stages in encoder, normally 5. Default: 5. + strides (Sequence[int 1 | 2]): Strides of each stage in encoder. + len(strides) is equal to num_stages. Normally the stride of the + first stage in encoder is 1. If strides[i]=2, it uses stride + convolution to downsample in the correspondence encoder stage. + Default: (1, 1, 1, 1, 1). + enc_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence encoder stage. + Default: (2, 2, 2, 2, 2). + dec_num_convs (Sequence[int]): Number of convolutional layers in the + convolution block of the correspondence decoder stage. + Default: (2, 2, 2, 2). + downsamples (Sequence[int]): Whether use MaxPool to downsample the + feature map after the first stage of encoder + (stages: [1, num_stages)). If the correspondence encoder stage use + stride convolution (strides[i]=2), it will never use MaxPool to + downsample, even downsamples[i-1]=True. + Default: (True, True, True, True). + enc_dilations (Sequence[int]): Dilation rate of each stage in encoder. + Default: (1, 1, 1, 1, 1). + dec_dilations (Sequence[int]): Dilation rate of each stage in decoder. + Default: (1, 1, 1, 1). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + pretrained (str, optional): model pretrained path. Default: None + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None + + Notice: + The input image size should be divisible by the whole downsample rate + of the encoder. More detail of the whole downsample rate can be found + in UNet._check_input_divisible. + """ + + def __init__(self, + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False, + dcn=None, + plugins=None, + pretrained=None, + init_cfg=None): + super(UNet, self).__init__(init_cfg) + + self.pretrained = pretrained + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be setting at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is a deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is None: + if init_cfg is None: + self.init_cfg = [ + dict(type='Kaiming', layer='Conv2d'), + dict( + type='Constant', + val=1, + layer=['_BatchNorm', 'GroupNorm']) + ] + else: + raise TypeError('pretrained must be a str or None') + + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + assert len(strides) == num_stages, \ + 'The length of strides should be equal to num_stages, '\ + f'while the strides is {strides}, the length of '\ + f'strides is {len(strides)}, and the num_stages is '\ + f'{num_stages}.' + assert len(enc_num_convs) == num_stages, \ + 'The length of enc_num_convs should be equal to num_stages, '\ + f'while the enc_num_convs is {enc_num_convs}, the length of '\ + f'enc_num_convs is {len(enc_num_convs)}, and the num_stages is '\ + f'{num_stages}.' + assert len(dec_num_convs) == (num_stages-1), \ + 'The length of dec_num_convs should be equal to (num_stages-1), '\ + f'while the dec_num_convs is {dec_num_convs}, the length of '\ + f'dec_num_convs is {len(dec_num_convs)}, and the num_stages is '\ + f'{num_stages}.' + assert len(downsamples) == (num_stages-1), \ + 'The length of downsamples should be equal to (num_stages-1), '\ + f'while the downsamples is {downsamples}, the length of '\ + f'downsamples is {len(downsamples)}, and the num_stages is '\ + f'{num_stages}.' + assert len(enc_dilations) == num_stages, \ + 'The length of enc_dilations should be equal to num_stages, '\ + f'while the enc_dilations is {enc_dilations}, the length of '\ + f'enc_dilations is {len(enc_dilations)}, and the num_stages is '\ + f'{num_stages}.' + assert len(dec_dilations) == (num_stages-1), \ + 'The length of dec_dilations should be equal to (num_stages-1), '\ + f'while the dec_dilations is {dec_dilations}, the length of '\ + f'dec_dilations is {len(dec_dilations)}, and the num_stages is '\ + f'{num_stages}.' + self.num_stages = num_stages + self.strides = strides + self.downsamples = downsamples + self.norm_eval = norm_eval + self.base_channels = base_channels + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + for i in range(num_stages): + enc_conv_block = [] + if i != 0: + if strides[i] == 1 and downsamples[i - 1]: + enc_conv_block.append(nn.MaxPool2d(kernel_size=2)) + upsample = (strides[i] != 1 or downsamples[i - 1]) + self.decoder.append( + UpConvBlock( + conv_block=BasicConvBlock, + in_channels=base_channels * 2**i, + skip_channels=base_channels * 2**(i - 1), + out_channels=base_channels * 2**(i - 1), + num_convs=dec_num_convs[i - 1], + stride=1, + dilation=dec_dilations[i - 1], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + upsample_cfg=upsample_cfg if upsample else None, + dcn=None, + plugins=None)) + + enc_conv_block.append( + BasicConvBlock( + in_channels=in_channels, + out_channels=base_channels * 2**i, + num_convs=enc_num_convs[i], + stride=strides[i], + dilation=enc_dilations[i], + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None)) + self.encoder.append((nn.Sequential(*enc_conv_block))) + in_channels = base_channels * 2**i + + def forward(self, x): + self._check_input_divisible(x) + enc_outs = [] + for enc in self.encoder: + x = enc(x) + enc_outs.append(x) + dec_outs = [x] + for i in reversed(range(len(self.decoder))): + x = self.decoder[i](enc_outs[i], x) + dec_outs.append(x) + + return dec_outs + + def train(self, mode=True): + """Convert the model into training mode while keep normalization layer + freezed.""" + super(UNet, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + # trick: eval have effect on BatchNorm only + if isinstance(m, _BatchNorm): + m.eval() + + def _check_input_divisible(self, x): + h, w = x.shape[-2:] + whole_downsample_rate = 1 + for i in range(1, self.num_stages): + if self.strides[i] == 2 or self.downsamples[i - 1]: + whole_downsample_rate *= 2 + assert (h % whole_downsample_rate == 0) \ + and (w % whole_downsample_rate == 0),\ + f'The input image size {(h, w)} should be divisible by the whole '\ + f'downsample rate {whole_downsample_rate}, when num_stages is '\ + f'{self.num_stages}, strides is {self.strides}, and downsamples '\ + f'is {self.downsamples}.' diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/vit.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/vit.py new file mode 100644 index 0000000..28fcc9e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/backbones/vit.py @@ -0,0 +1,440 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import warnings + +import torch +import torch.nn as nn +import torch.utils.checkpoint as cp +from mmcv.cnn import build_norm_layer +from mmcv.cnn.bricks.transformer import FFN, MultiheadAttention +from mmcv.cnn.utils.weight_init import (constant_init, kaiming_init, + trunc_normal_) +from mmcv.runner import (BaseModule, CheckpointLoader, ModuleList, + load_state_dict) +from torch.nn.modules.batchnorm import _BatchNorm +from torch.nn.modules.utils import _pair as to_2tuple + +from mmseg.ops import resize +from mmseg.utils import get_root_logger +from ..builder import BACKBONES +from ..utils import PatchEmbed + + +class TransformerEncoderLayer(BaseModule): + """Implements one encoder layer in Vision Transformer. + + Args: + embed_dims (int): The feature dimension. + num_heads (int): Parallel attention heads. + feedforward_channels (int): The hidden dimension for FFNs. + drop_rate (float): Probability of an element to be zeroed + after the feed forward layer. Default: 0.0. + attn_drop_rate (float): The drop out rate for attention layer. + Default: 0.0. + drop_path_rate (float): stochastic depth rate. Default 0.0. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + qkv_bias (bool): enable bias for qkv if True. Default: True + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN'). + batch_first (bool): Key, Query and Value are shape of + (batch, n, embed_dim) + or (n, batch, embed_dim). Default: True. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. Default: False. + """ + + def __init__(self, + embed_dims, + num_heads, + feedforward_channels, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + num_fcs=2, + qkv_bias=True, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + batch_first=True, + attn_cfg=dict(), + ffn_cfg=dict(), + with_cp=False): + super(TransformerEncoderLayer, self).__init__() + + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, embed_dims, postfix=1) + self.add_module(self.norm1_name, norm1) + + attn_cfg.update( + dict( + embed_dims=embed_dims, + num_heads=num_heads, + attn_drop=attn_drop_rate, + proj_drop=drop_rate, + batch_first=batch_first, + bias=qkv_bias)) + + self.build_attn(attn_cfg) + + self.norm2_name, norm2 = build_norm_layer( + norm_cfg, embed_dims, postfix=2) + self.add_module(self.norm2_name, norm2) + + ffn_cfg.update( + dict( + embed_dims=embed_dims, + feedforward_channels=feedforward_channels, + num_fcs=num_fcs, + ffn_drop=drop_rate, + dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate) + if drop_path_rate > 0 else None, + act_cfg=act_cfg)) + self.build_ffn(ffn_cfg) + self.with_cp = with_cp + + def build_attn(self, attn_cfg): + self.attn = MultiheadAttention(**attn_cfg) + + def build_ffn(self, ffn_cfg): + self.ffn = FFN(**ffn_cfg) + + @property + def norm1(self): + return getattr(self, self.norm1_name) + + @property + def norm2(self): + return getattr(self, self.norm2_name) + + def forward(self, x): + + def _inner_forward(x): + x = self.attn(self.norm1(x), identity=x) + x = self.ffn(self.norm2(x), identity=x) + return x + + if self.with_cp and x.requires_grad: + x = cp.checkpoint(_inner_forward, x) + else: + x = _inner_forward(x) + return x + + +@BACKBONES.register_module() +class VisionTransformer(BaseModule): + """Vision Transformer. + + This backbone is the implementation of `An Image is Worth 16x16 Words: + Transformers for Image Recognition at + Scale `_. + + Args: + img_size (int | tuple): Input image size. Default: 224. + patch_size (int): The patch size. Default: 16. + in_channels (int): Number of input channels. Default: 3. + embed_dims (int): embedding dimension. Default: 768. + num_layers (int): depth of transformer. Default: 12. + num_heads (int): number of attention heads. Default: 12. + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + out_indices (list | tuple | int): Output from which stages. + Default: -1. + qkv_bias (bool): enable bias for qkv if True. Default: True. + drop_rate (float): Probability of an element to be zeroed. + Default 0.0 + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + drop_path_rate (float): stochastic depth rate. Default 0.0 + with_cls_token (bool): Whether concatenating class token into image + tokens as transformer input. Default: True. + output_cls_token (bool): Whether output the cls_token. If set True, + `with_cls_token` must be True. Default: False. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + patch_norm (bool): Whether to add a norm in PatchEmbed Block. + Default: False. + final_norm (bool): Whether to add a additional layer to normalize + final feature map. Default: False. + interpolate_mode (str): Select the interpolate mode for position + embeding vector resize. Default: bicubic. + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + norm_eval (bool): Whether to set norm layers to eval mode, namely, + freeze running stats (mean and var). Note: Effect on Batch Norm + and its variants only. Default: False. + with_cp (bool): Use checkpoint or not. Using checkpoint will save + some memory while slowing down the training speed. Default: False. + pretrained (str, optional): model pretrained path. Default: None. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + img_size=224, + patch_size=16, + in_channels=3, + embed_dims=768, + num_layers=12, + num_heads=12, + mlp_ratio=4, + out_indices=-1, + qkv_bias=True, + drop_rate=0., + attn_drop_rate=0., + drop_path_rate=0., + with_cls_token=True, + output_cls_token=False, + norm_cfg=dict(type='LN'), + act_cfg=dict(type='GELU'), + patch_norm=False, + final_norm=False, + interpolate_mode='bicubic', + num_fcs=2, + norm_eval=False, + with_cp=False, + pretrained=None, + init_cfg=None): + super(VisionTransformer, self).__init__(init_cfg=init_cfg) + + if isinstance(img_size, int): + img_size = to_2tuple(img_size) + elif isinstance(img_size, tuple): + if len(img_size) == 1: + img_size = to_2tuple(img_size[0]) + assert len(img_size) == 2, \ + f'The size of image should have length 1 or 2, ' \ + f'but got {len(img_size)}' + + if output_cls_token: + assert with_cls_token is True, f'with_cls_token must be True if' \ + f'set output_cls_token to True, but got {with_cls_token}' + + assert not (init_cfg and pretrained), \ + 'init_cfg and pretrained cannot be set at the same time' + if isinstance(pretrained, str): + warnings.warn('DeprecationWarning: pretrained is deprecated, ' + 'please use "init_cfg" instead') + self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) + elif pretrained is not None: + raise TypeError('pretrained must be a str or None') + + self.img_size = img_size + self.patch_size = patch_size + self.interpolate_mode = interpolate_mode + self.norm_eval = norm_eval + self.with_cp = with_cp + self.pretrained = pretrained + + self.patch_embed = PatchEmbed( + in_channels=in_channels, + embed_dims=embed_dims, + conv_type='Conv2d', + kernel_size=patch_size, + stride=patch_size, + padding='corner', + norm_cfg=norm_cfg if patch_norm else None, + init_cfg=None, + ) + + num_patches = (img_size[0] // patch_size) * \ + (img_size[1] // patch_size) + + self.with_cls_token = with_cls_token + self.output_cls_token = output_cls_token + self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dims)) + self.pos_embed = nn.Parameter( + torch.zeros(1, num_patches + 1, embed_dims)) + self.drop_after_pos = nn.Dropout(p=drop_rate) + + if isinstance(out_indices, int): + if out_indices == -1: + out_indices = num_layers - 1 + self.out_indices = [out_indices] + elif isinstance(out_indices, list) or isinstance(out_indices, tuple): + self.out_indices = out_indices + else: + raise TypeError('out_indices must be type of int, list or tuple') + + dpr = [ + x.item() for x in torch.linspace(0, drop_path_rate, num_layers) + ] # stochastic depth decay rule + + self.layers = ModuleList() + for i in range(num_layers): + self.layers.append( + TransformerEncoderLayer( + embed_dims=embed_dims, + num_heads=num_heads, + feedforward_channels=mlp_ratio * embed_dims, + attn_drop_rate=attn_drop_rate, + drop_rate=drop_rate, + drop_path_rate=dpr[i], + num_fcs=num_fcs, + qkv_bias=qkv_bias, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + with_cp=with_cp, + batch_first=True)) + + self.final_norm = final_norm + if final_norm: + self.norm1_name, norm1 = build_norm_layer( + norm_cfg, embed_dims, postfix=1) + self.add_module(self.norm1_name, norm1) + + @property + def norm1(self): + return getattr(self, self.norm1_name) + + def init_weights(self): + if (isinstance(self.init_cfg, dict) + and self.init_cfg.get('type') == 'Pretrained'): + logger = get_root_logger() + checkpoint = CheckpointLoader.load_checkpoint( + self.init_cfg['checkpoint'], logger=logger, map_location='cpu') + + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + if 'pos_embed' in state_dict.keys(): + if self.pos_embed.shape != state_dict['pos_embed'].shape: + logger.info(msg=f'Resize the pos_embed shape from ' + f'{state_dict["pos_embed"].shape} to ' + f'{self.pos_embed.shape}') + h, w = self.img_size + pos_size = int( + math.sqrt(state_dict['pos_embed'].shape[1] - 1)) + state_dict['pos_embed'] = self.resize_pos_embed( + state_dict['pos_embed'], + (h // self.patch_size, w // self.patch_size), + (pos_size, pos_size), self.interpolate_mode) + + load_state_dict(self, state_dict, strict=False, logger=logger) + elif self.init_cfg is not None: + super(VisionTransformer, self).init_weights() + else: + # We only implement the 'jax_impl' initialization implemented at + # https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py#L353 # noqa: E501 + trunc_normal_(self.pos_embed, std=.02) + trunc_normal_(self.cls_token, std=.02) + for n, m in self.named_modules(): + if isinstance(m, nn.Linear): + trunc_normal_(m.weight, std=.02) + if m.bias is not None: + if 'ffn' in n: + nn.init.normal_(m.bias, mean=0., std=1e-6) + else: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Conv2d): + kaiming_init(m, mode='fan_in', bias=0.) + elif isinstance(m, (_BatchNorm, nn.GroupNorm, nn.LayerNorm)): + constant_init(m, val=1.0, bias=0.) + + def _pos_embeding(self, patched_img, hw_shape, pos_embed): + """Positiong embeding method. + + Resize the pos_embed, if the input image size doesn't match + the training size. + Args: + patched_img (torch.Tensor): The patched image, it should be + shape of [B, L1, C]. + hw_shape (tuple): The downsampled image resolution. + pos_embed (torch.Tensor): The pos_embed weighs, it should be + shape of [B, L2, c]. + Return: + torch.Tensor: The pos encoded image feature. + """ + assert patched_img.ndim == 3 and pos_embed.ndim == 3, \ + 'the shapes of patched_img and pos_embed must be [B, L, C]' + x_len, pos_len = patched_img.shape[1], pos_embed.shape[1] + if x_len != pos_len: + if pos_len == (self.img_size[0] // self.patch_size) * ( + self.img_size[1] // self.patch_size) + 1: + pos_h = self.img_size[0] // self.patch_size + pos_w = self.img_size[1] // self.patch_size + else: + raise ValueError( + 'Unexpected shape of pos_embed, got {}.'.format( + pos_embed.shape)) + pos_embed = self.resize_pos_embed(pos_embed, hw_shape, + (pos_h, pos_w), + self.interpolate_mode) + return self.drop_after_pos(patched_img + pos_embed) + + @staticmethod + def resize_pos_embed(pos_embed, input_shpae, pos_shape, mode): + """Resize pos_embed weights. + + Resize pos_embed using bicubic interpolate method. + Args: + pos_embed (torch.Tensor): Position embedding weights. + input_shpae (tuple): Tuple for (downsampled input image height, + downsampled input image width). + pos_shape (tuple): The resolution of downsampled origin training + image. + mode (str): Algorithm used for upsampling: + ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` | + ``'trilinear'``. Default: ``'nearest'`` + Return: + torch.Tensor: The resized pos_embed of shape [B, L_new, C] + """ + assert pos_embed.ndim == 3, 'shape of pos_embed must be [B, L, C]' + pos_h, pos_w = pos_shape + # keep dim for easy deployment + cls_token_weight = pos_embed[:, 0:1] + pos_embed_weight = pos_embed[:, (-1 * pos_h * pos_w):] + pos_embed_weight = pos_embed_weight.reshape( + 1, pos_h, pos_w, pos_embed.shape[2]).permute(0, 3, 1, 2) + pos_embed_weight = resize( + pos_embed_weight, size=input_shpae, align_corners=False, mode=mode) + pos_embed_weight = torch.flatten(pos_embed_weight, 2).transpose(1, 2) + pos_embed = torch.cat((cls_token_weight, pos_embed_weight), dim=1) + return pos_embed + + def forward(self, inputs): + B = inputs.shape[0] + + x, hw_shape = self.patch_embed(inputs) + + # stole cls_tokens impl from Phil Wang, thanks + cls_tokens = self.cls_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, x), dim=1) + x = self._pos_embeding(x, hw_shape, self.pos_embed) + + if not self.with_cls_token: + # Remove class token for transformer encoder input + x = x[:, 1:] + + outs = [] + for i, layer in enumerate(self.layers): + x = layer(x) + if i == len(self.layers) - 1: + if self.final_norm: + x = self.norm1(x) + if i in self.out_indices: + if self.with_cls_token: + # Remove class token and reshape token for decoder head + out = x[:, 1:] + else: + out = x + B, _, C = out.shape + out = out.reshape(B, hw_shape[0], hw_shape[1], + C).permute(0, 3, 1, 2).contiguous() + if self.output_cls_token: + out = [out, x[:, 0]] + outs.append(out) + + return tuple(outs) + + def train(self, mode=True): + super(VisionTransformer, self).train(mode) + if mode and self.norm_eval: + for m in self.modules(): + if isinstance(m, nn.LayerNorm): + m.eval() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/builder.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/builder.py new file mode 100644 index 0000000..5e18e4e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/builder.py @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +from mmcv.cnn import MODELS as MMCV_MODELS +from mmcv.cnn.bricks.registry import ATTENTION as MMCV_ATTENTION +from mmcv.utils import Registry + +MODELS = Registry('models', parent=MMCV_MODELS) +ATTENTION = Registry('attention', parent=MMCV_ATTENTION) + +BACKBONES = MODELS +NECKS = MODELS +HEADS = MODELS +LOSSES = MODELS +SEGMENTORS = MODELS + + +def build_backbone(cfg): + """Build backbone.""" + return BACKBONES.build(cfg) + + +def build_neck(cfg): + """Build neck.""" + return NECKS.build(cfg) + + +def build_head(cfg): + """Build head.""" + return HEADS.build(cfg) + + +def build_loss(cfg): + """Build loss.""" + return LOSSES.build(cfg) + + +def build_segmentor(cfg, train_cfg=None, test_cfg=None): + """Build segmentor.""" + if train_cfg is not None or test_cfg is not None: + warnings.warn( + 'train_cfg and test_cfg is deprecated, ' + 'please specify them in model', UserWarning) + assert cfg.get('train_cfg') is None or train_cfg is None, \ + 'train_cfg specified in both outer field and model field ' + assert cfg.get('test_cfg') is None or test_cfg is None, \ + 'test_cfg specified in both outer field and model field ' + return SEGMENTORS.build( + cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/__init__.py new file mode 100644 index 0000000..8add761 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/__init__.py @@ -0,0 +1,40 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .ann_head import ANNHead +from .apc_head import APCHead +from .aspp_head import ASPPHead +from .cc_head import CCHead +from .da_head import DAHead +from .dm_head import DMHead +from .dnl_head import DNLHead +from .dpt_head import DPTHead +from .ema_head import EMAHead +from .enc_head import EncHead +from .fcn_head import FCNHead +from .fpn_head import FPNHead +from .gc_head import GCHead +from .isa_head import ISAHead +from .knet_head import IterativeDecodeHead, KernelUpdateHead, KernelUpdator +from .lraspp_head import LRASPPHead +from .nl_head import NLHead +from .ocr_head import OCRHead +from .point_head import PointHead +from .psa_head import PSAHead +from .psp_head import PSPHead +from .segformer_head import SegformerHead +from .segmenter_mask_head import SegmenterMaskTransformerHead +from .sep_aspp_head import DepthwiseSeparableASPPHead +from .sep_fcn_head import DepthwiseSeparableFCNHead +from .setr_mla_head import SETRMLAHead +from .setr_up_head import SETRUPHead +from .stdc_head import STDCHead +from .uper_head import UPerHead + +__all__ = [ + 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', + 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', + 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'EMAHead', 'DNLHead', + 'PointHead', 'APCHead', 'DMHead', 'LRASPPHead', 'SETRUPHead', + 'SETRMLAHead', 'DPTHead', 'SETRMLAHead', 'SegmenterMaskTransformerHead', + 'SegformerHead', 'ISAHead', 'STDCHead', 'IterativeDecodeHead', + 'KernelUpdateHead', 'KernelUpdator' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/ann_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/ann_head.py new file mode 100644 index 0000000..c8d882e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/ann_head.py @@ -0,0 +1,246 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PPMConcat(nn.ModuleList): + """Pyramid Pooling Module that only concat the features of each layer. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + """ + + def __init__(self, pool_scales=(1, 3, 6, 8)): + super(PPMConcat, self).__init__( + [nn.AdaptiveAvgPool2d(pool_scale) for pool_scale in pool_scales]) + + def forward(self, feats): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(feats) + ppm_outs.append(ppm_out.view(*feats.shape[:2], -1)) + concat_outs = torch.cat(ppm_outs, dim=2) + return concat_outs + + +class SelfAttentionBlock(_SelfAttentionBlock): + """Make a ANN used SelfAttentionBlock. + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_scale (int): The scale of query feature map. + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, low_in_channels, high_in_channels, channels, + out_channels, share_key_query, query_scale, key_pool_scales, + conv_cfg, norm_cfg, act_cfg): + key_psp = PPMConcat(key_pool_scales) + if query_scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=query_scale) + else: + query_downsample = None + super(SelfAttentionBlock, self).__init__( + key_in_channels=low_in_channels, + query_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=share_key_query, + query_downsample=query_downsample, + key_downsample=key_psp, + key_query_num_convs=1, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + +class AFNB(nn.Module): + """Asymmetric Fusion Non-local Block(AFNB) + + Args: + low_in_channels (int): Input channels of lower level feature, + which is the key feature for self-attention. + high_in_channels (int): Input channels of higher level feature, + which is the query feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + and query projection. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, low_in_channels, high_in_channels, channels, + out_channels, query_scales, key_pool_scales, conv_cfg, + norm_cfg, act_cfg): + super(AFNB, self).__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=False, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.bottleneck = ConvModule( + out_channels + high_in_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + def forward(self, low_feats, high_feats): + """Forward function.""" + priors = [stage(high_feats, low_feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, high_feats], 1)) + return output + + +class APNB(nn.Module): + """Asymmetric Pyramid Non-local Block (APNB) + + Args: + in_channels (int): Input channels of key/query feature, + which is the key feature for self-attention. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module of key feature. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, in_channels, channels, out_channels, query_scales, + key_pool_scales, conv_cfg, norm_cfg, act_cfg): + super(APNB, self).__init__() + self.stages = nn.ModuleList() + for query_scale in query_scales: + self.stages.append( + SelfAttentionBlock( + low_in_channels=in_channels, + high_in_channels=in_channels, + channels=channels, + out_channels=out_channels, + share_key_query=True, + query_scale=query_scale, + key_pool_scales=key_pool_scales, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.bottleneck = ConvModule( + 2 * in_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, feats): + """Forward function.""" + priors = [stage(feats, feats) for stage in self.stages] + context = torch.stack(priors, dim=0).sum(dim=0) + output = self.bottleneck(torch.cat([context, feats], 1)) + return output + + +@HEADS.register_module() +class ANNHead(BaseDecodeHead): + """Asymmetric Non-local Neural Networks for Semantic Segmentation. + + This head is the implementation of `ANNNet + `_. + + Args: + project_channels (int): Projection channels for Nonlocal. + query_scales (tuple[int]): The scales of query feature map. + Default: (1,) + key_pool_scales (tuple[int]): The pooling scales of key feature map. + Default: (1, 3, 6, 8). + """ + + def __init__(self, + project_channels, + query_scales=(1, ), + key_pool_scales=(1, 3, 6, 8), + **kwargs): + super(ANNHead, self).__init__( + input_transform='multiple_select', **kwargs) + assert len(self.in_channels) == 2 + low_in_channels, high_in_channels = self.in_channels + self.project_channels = project_channels + self.fusion = AFNB( + low_in_channels=low_in_channels, + high_in_channels=high_in_channels, + out_channels=high_in_channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + high_in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.context = APNB( + in_channels=self.channels, + out_channels=self.channels, + channels=project_channels, + query_scales=query_scales, + key_pool_scales=key_pool_scales, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + low_feats, high_feats = self._transform_inputs(inputs) + output = self.fusion(low_feats, high_feats) + output = self.dropout(output) + output = self.bottleneck(output) + output = self.context(output) + output = self.cls_seg(output) + + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/apc_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/apc_head.py new file mode 100644 index 0000000..3198fd1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/apc_head.py @@ -0,0 +1,159 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ACM(nn.Module): + """Adaptive Context Module used in APCNet. + + Args: + pool_scale (int): Pooling scale used in Adaptive Context + Module to extract region features. + fusion (bool): Add one conv to fuse residual feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, pool_scale, fusion, in_channels, channels, conv_cfg, + norm_cfg, act_cfg): + super(ACM, self).__init__() + self.pool_scale = pool_scale + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.pooled_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.input_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.global_info = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + self.gla = nn.Conv2d(self.channels, self.pool_scale**2, 1, 1, 0) + + self.residual_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, x): + """Forward function.""" + pooled_x = F.adaptive_avg_pool2d(x, self.pool_scale) + # [batch_size, channels, h, w] + x = self.input_redu_conv(x) + # [batch_size, channels, pool_scale, pool_scale] + pooled_x = self.pooled_redu_conv(pooled_x) + batch_size = x.size(0) + # [batch_size, pool_scale * pool_scale, channels] + pooled_x = pooled_x.view(batch_size, self.channels, + -1).permute(0, 2, 1).contiguous() + # [batch_size, h * w, pool_scale * pool_scale] + affinity_matrix = self.gla(x + resize( + self.global_info(F.adaptive_avg_pool2d(x, 1)), size=x.shape[2:]) + ).permute(0, 2, 3, 1).reshape( + batch_size, -1, self.pool_scale**2) + affinity_matrix = F.sigmoid(affinity_matrix) + # [batch_size, h * w, channels] + z_out = torch.matmul(affinity_matrix, pooled_x) + # [batch_size, channels, h * w] + z_out = z_out.permute(0, 2, 1).contiguous() + # [batch_size, channels, h, w] + z_out = z_out.view(batch_size, self.channels, x.size(2), x.size(3)) + z_out = self.residual_conv(z_out) + z_out = F.relu(z_out + x) + if self.fusion: + z_out = self.fusion_conv(z_out) + + return z_out + + +@HEADS.register_module() +class APCHead(BaseDecodeHead): + """Adaptive Pyramid Context Network for Semantic Segmentation. + + This head is the implementation of + `APCNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Adaptive Context + Module. Default: (1, 2, 3, 6). + fusion (bool): Add one conv to fuse residual feature. + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), fusion=True, **kwargs): + super(APCHead, self).__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.fusion = fusion + acm_modules = [] + for pool_scale in self.pool_scales: + acm_modules.append( + ACM(pool_scale, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.acm_modules = nn.ModuleList(acm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + acm_outs = [x] + for acm_module in self.acm_modules: + acm_outs.append(acm_module(x)) + acm_outs = torch.cat(acm_outs, dim=1) + output = self.bottleneck(acm_outs) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/aspp_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/aspp_head.py new file mode 100644 index 0000000..7059aee --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/aspp_head.py @@ -0,0 +1,122 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ASPPModule(nn.ModuleList): + """Atrous Spatial Pyramid Pooling (ASPP) Module. + + Args: + dilations (tuple[int]): Dilation rate of each layer. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, dilations, in_channels, channels, conv_cfg, norm_cfg, + act_cfg): + super(ASPPModule, self).__init__() + self.dilations = dilations + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for dilation in dilations: + self.append( + ConvModule( + self.in_channels, + self.channels, + 1 if dilation == 1 else 3, + dilation=dilation, + padding=0 if dilation == 1 else dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + def forward(self, x): + """Forward function.""" + aspp_outs = [] + for aspp_module in self: + aspp_outs.append(aspp_module(x)) + + return aspp_outs + + +@HEADS.register_module() +class ASPPHead(BaseDecodeHead): + """Rethinking Atrous Convolution for Semantic Image Segmentation. + + This head is the implementation of `DeepLabV3 + `_. + + Args: + dilations (tuple[int]): Dilation rates for ASPP module. + Default: (1, 6, 12, 18). + """ + + def __init__(self, dilations=(1, 6, 12, 18), **kwargs): + super(ASPPHead, self).__init__(**kwargs) + assert isinstance(dilations, (list, tuple)) + self.dilations = dilations + self.image_pool = nn.Sequential( + nn.AdaptiveAvgPool2d(1), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.aspp_modules = ASPPModule( + dilations, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + (len(dilations) + 1) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _forward_feature(self, inputs): + """Forward function for feature maps before classifying each pixel with + ``self.cls_seg`` fc. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + feats (Tensor): A tensor of shape (batch_size, self.channels, + H, W) which is feature map for last layer of decoder head. + """ + x = self._transform_inputs(inputs) + aspp_outs = [ + resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + feats = self.bottleneck(aspp_outs) + return feats + + def forward(self, inputs): + """Forward function.""" + output = self._forward_feature(inputs) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/cascade_decode_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/cascade_decode_head.py new file mode 100644 index 0000000..f7c3da0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/cascade_decode_head.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod + +from .decode_head import BaseDecodeHead + + +class BaseCascadeDecodeHead(BaseDecodeHead, metaclass=ABCMeta): + """Base class for cascade decode head used in + :class:`CascadeEncoderDecoder.""" + + def __init__(self, *args, **kwargs): + super(BaseCascadeDecodeHead, self).__init__(*args, **kwargs) + + @abstractmethod + def forward(self, inputs, prev_output): + """Placeholder of forward function.""" + pass + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, + train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs, prev_output) + losses = self.losses(seg_logits, gt_semantic_seg) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + return self.forward(inputs, prev_output) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/cc_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/cc_head.py new file mode 100644 index 0000000..ed19eb4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/cc_head.py @@ -0,0 +1,43 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from ..builder import HEADS +from .fcn_head import FCNHead + +try: + from mmcv.ops import CrissCrossAttention +except ModuleNotFoundError: + CrissCrossAttention = None + + +@HEADS.register_module() +class CCHead(FCNHead): + """CCNet: Criss-Cross Attention for Semantic Segmentation. + + This head is the implementation of `CCNet + `_. + + Args: + recurrence (int): Number of recurrence of Criss Cross Attention + module. Default: 2. + """ + + def __init__(self, recurrence=2, **kwargs): + if CrissCrossAttention is None: + raise RuntimeError('Please install mmcv-full for ' + 'CrissCrossAttention ops') + super(CCHead, self).__init__(num_convs=2, **kwargs) + self.recurrence = recurrence + self.cca = CrissCrossAttention(self.channels) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + for _ in range(self.recurrence): + output = self.cca(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/da_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/da_head.py new file mode 100644 index 0000000..77fd663 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/da_head.py @@ -0,0 +1,179 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn.functional as F +from mmcv.cnn import ConvModule, Scale +from torch import nn + +from mmseg.core import add_prefix +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PAM(_SelfAttentionBlock): + """Position Attention Module (PAM) + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + """ + + def __init__(self, in_channels, channels): + super(PAM, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=1, + key_query_norm=False, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=False, + with_out=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None) + + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + out = super(PAM, self).forward(x, x) + + out = self.gamma(out) + x + return out + + +class CAM(nn.Module): + """Channel Attention Module (CAM)""" + + def __init__(self): + super(CAM, self).__init__() + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + batch_size, channels, height, width = x.size() + proj_query = x.view(batch_size, channels, -1) + proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1) + energy = torch.bmm(proj_query, proj_key) + energy_new = torch.max( + energy, -1, keepdim=True)[0].expand_as(energy) - energy + attention = F.softmax(energy_new, dim=-1) + proj_value = x.view(batch_size, channels, -1) + + out = torch.bmm(attention, proj_value) + out = out.view(batch_size, channels, height, width) + + out = self.gamma(out) + x + return out + + +@HEADS.register_module() +class DAHead(BaseDecodeHead): + """Dual Attention Network for Scene Segmentation. + + This head is the implementation of `DANet + `_. + + Args: + pam_channels (int): The channels of Position Attention Module(PAM). + """ + + def __init__(self, pam_channels, **kwargs): + super(DAHead, self).__init__(**kwargs) + self.pam_channels = pam_channels + self.pam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam = PAM(self.channels, pam_channels) + self.pam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + self.cam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam = CAM() + self.cam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + def pam_cls_seg(self, feat): + """PAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.pam_conv_seg(feat) + return output + + def cam_cls_seg(self, feat): + """CAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.cam_conv_seg(feat) + return output + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + pam_feat = self.pam_in_conv(x) + pam_feat = self.pam(pam_feat) + pam_feat = self.pam_out_conv(pam_feat) + pam_out = self.pam_cls_seg(pam_feat) + + cam_feat = self.cam_in_conv(x) + cam_feat = self.cam(cam_feat) + cam_feat = self.cam_out_conv(cam_feat) + cam_out = self.cam_cls_seg(cam_feat) + + feat_sum = pam_feat + cam_feat + pam_cam_out = self.cls_seg(feat_sum) + + return pam_cam_out, pam_out, cam_out + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, only ``pam_cam`` is used.""" + return self.forward(inputs)[0] + + def losses(self, seg_logit, seg_label): + """Compute ``pam_cam``, ``pam``, ``cam`` loss.""" + pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit + loss = dict() + loss.update( + add_prefix( + super(DAHead, self).losses(pam_cam_seg_logit, seg_label), + 'pam_cam')) + loss.update( + add_prefix( + super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam')) + loss.update( + add_prefix( + super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam')) + return loss diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/da_head_modify.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/da_head_modify.py new file mode 100644 index 0000000..6c94711 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/da_head_modify.py @@ -0,0 +1,364 @@ +# # Copyright (c) OpenMMLab. All rights reserved. +# import torch +# import torch.nn.functional as F +# from mmcv.cnn import ConvModule, Scale +# from torch import nn + +# from mmseg.core import add_prefix +# from ..builder import HEADS +# from ..utils import SelfAttentionBlock as _SelfAttentionBlock +# from .decode_head import BaseDecodeHead + + +# class PAM(_SelfAttentionBlock): +# """Position Attention Module (PAM) + +# Args: +# in_channels (int): Input channels of key/query feature. +# channels (int): Output channels of key/query transform. +# """ + +# def __init__(self, in_channels, channels): +# super(PAM, self).__init__( +# key_in_channels=in_channels, +# query_in_channels=in_channels, +# channels=channels, +# out_channels=in_channels, +# share_key_query=False, +# query_downsample=None, +# key_downsample=None, +# key_query_num_convs=1, +# key_query_norm=False, +# value_out_num_convs=1, +# value_out_norm=False, +# matmul_norm=False, +# with_out=False, +# conv_cfg=None, +# norm_cfg=None, +# act_cfg=None) + +# self.gamma = Scale(0) + +# def forward(self, x): +# """Forward function.""" +# out = super(PAM, self).forward(x, x) + +# out = self.gamma(out) + x +# return out + + +# class CAM(nn.Module): +# """Channel Attention Module (CAM)""" + +# def __init__(self): +# super(CAM, self).__init__() +# self.gamma = Scale(0) + +# def forward(self, x): +# """Forward function.""" +# batch_size, channels, height, width = x.size() +# proj_query = x.view(batch_size, channels, -1) +# proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1) +# energy = torch.bmm(proj_query, proj_key) +# energy_new = torch.max( +# energy, -1, keepdim=True)[0].expand_as(energy) - energy +# attention = F.softmax(energy_new, dim=-1) +# proj_value = x.view(batch_size, channels, -1) + +# out = torch.bmm(attention, proj_value) +# out = out.view(batch_size, channels, height, width) + +# out = self.gamma(out) + x +# return out + + +# @HEADS.register_module() +# class DAHead(BaseDecodeHead): +# """Dual Attention Network for Scene Segmentation. + +# This head is the implementation of `DANet +# `_. + +# Args: +# pam_channels (int): The channels of Position Attention Module(PAM). +# """ + +# def __init__(self, pam_channels, **kwargs): +# super(DAHead, self).__init__(**kwargs) +# self.pam_channels = pam_channels + + +# self.pam_in_conv = ConvModule( +# self.in_channels, +# self.channels, +# 3, +# padding=1, +# conv_cfg=self.conv_cfg, +# norm_cfg=self.norm_cfg, +# act_cfg=self.act_cfg) +# self.pam = PAM(self.channels, pam_channels) +# self.pam_out_conv = ConvModule( +# self.channels, +# self.channels, +# 3, +# padding=1, +# conv_cfg=self.conv_cfg, +# norm_cfg=self.norm_cfg, +# act_cfg=self.act_cfg) +# self.pam_conv_seg = nn.Conv2d( +# self.channels, self.num_classes, kernel_size=1) + +# self.cam_in_conv = ConvModule( +# self.in_channels, +# self.channels, +# 3, +# padding=1, +# conv_cfg=self.conv_cfg, +# norm_cfg=self.norm_cfg, +# act_cfg=self.act_cfg) +# self.cam = CAM() +# self.cam_out_conv = ConvModule( +# self.channels, +# self.channels, +# 3, +# padding=1, +# conv_cfg=self.conv_cfg, +# norm_cfg=self.norm_cfg, +# act_cfg=self.act_cfg) +# self.cam_conv_seg = nn.Conv2d( +# self.channels, self.num_classes, kernel_size=1) + +# def pam_cls_seg(self, feat): +# """PAM feature classification.""" +# if self.dropout is not None: +# feat = self.dropout(feat) +# output = self.pam_conv_seg(feat) +# return output + +# def cam_cls_seg(self, feat): +# """CAM feature classification.""" +# if self.dropout is not None: +# feat = self.dropout(feat) +# output = self.cam_conv_seg(feat) +# return output + +# def forward(self, inputs): +# """Forward function.""" +# x = self._transform_inputs(inputs) +# pam_feat = self.pam_in_conv(x) +# pam_feat = self.pam(pam_feat) +# pam_feat = self.pam_out_conv(pam_feat) +# pam_out = self.pam_cls_seg(pam_feat) + +# cam_feat = self.cam_in_conv(x) +# cam_feat = self.cam(cam_feat) +# cam_feat = self.cam_out_conv(cam_feat) +# cam_out = self.cam_cls_seg(cam_feat) + +# feat_sum = pam_feat + cam_feat +# pam_cam_out = self.cls_seg(feat_sum) + +# return pam_cam_out, pam_out, cam_out + +# def forward_test(self, inputs, img_metas, test_cfg): +# """Forward function for testing, only ``pam_cam`` is used.""" +# return self.forward(inputs)[0] + +# def losses(self, seg_logit, seg_label): +# """Compute ``pam_cam``, ``pam``, ``cam`` loss.""" +# pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit +# loss = dict() +# loss.update( +# add_prefix( +# super(DAHead, self).losses(pam_cam_seg_logit, seg_label), +# 'pam_cam')) +# loss.update( +# add_prefix( +# super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam')) +# loss.update( +# add_prefix( +# super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam')) +# return loss +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn.functional as F +from mmcv.cnn import ConvModule, Scale +from torch import nn + +from mmseg.core import add_prefix +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class PAM(_SelfAttentionBlock): + """Position Attention Module (PAM) + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + """ + + def __init__(self, in_channels, channels): + super(PAM, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=1, + key_query_norm=False, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=False, + with_out=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None) + + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + out = super(PAM, self).forward(x, x) + + out = self.gamma(out) + x + return out + + +class CAM(nn.Module): + """Channel Attention Module (CAM)""" + + def __init__(self): + super(CAM, self).__init__() + self.gamma = Scale(0) + + def forward(self, x): + """Forward function.""" + batch_size, channels, height, width = x.size() + proj_query = x.view(batch_size, channels, -1) + proj_key = x.view(batch_size, channels, -1).permute(0, 2, 1) + energy = torch.bmm(proj_query, proj_key) + energy_new = torch.max( + energy, -1, keepdim=True)[0].expand_as(energy) - energy + attention = F.softmax(energy_new, dim=-1) + proj_value = x.view(batch_size, channels, -1) + + out = torch.bmm(attention, proj_value) + out = out.view(batch_size, channels, height, width) + + out = self.gamma(out) + x + return out + + +@HEADS.register_module() +class DAHead(BaseDecodeHead): + """Dual Attention Network for Scene Segmentation. + + This head is the implementation of `DANet + `_. + + Args: + pam_channels (int): The channels of Position Attention Module(PAM). + """ + + def __init__(self, pam_channels, **kwargs): + super(DAHead, self).__init__(**kwargs) + self.pam_channels = pam_channels +###################################################################### + # Add normalization layer right after the input layer + self.input_norm = nn.BatchNorm2d(self.in_channels) +################################################################### + self.pam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam = PAM(self.channels, pam_channels) + self.pam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.pam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + self.cam_in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam = CAM() + self.cam_out_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.cam_conv_seg = nn.Conv2d( + self.channels, self.num_classes, kernel_size=1) + + def pam_cls_seg(self, feat): + """PAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.pam_conv_seg(feat) + return output + + def cam_cls_seg(self, feat): + """CAM feature classification.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.cam_conv_seg(feat) + return output + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + pam_feat = self.pam_in_conv(x) + pam_feat = self.pam(pam_feat) + pam_feat = self.pam_out_conv(pam_feat) + pam_out = self.pam_cls_seg(pam_feat) + + cam_feat = self.cam_in_conv(x) + cam_feat = self.cam(cam_feat) + cam_feat = self.cam_out_conv(cam_feat) + cam_out = self.cam_cls_seg(cam_feat) + + feat_sum = pam_feat + cam_feat + pam_cam_out = self.cls_seg(feat_sum) + + return pam_cam_out, pam_out, cam_out + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, only ``pam_cam`` is used.""" + return self.forward(inputs)[0] + + def losses(self, seg_logit, seg_label): + """Compute ``pam_cam``, ``pam``, ``cam`` loss.""" + pam_cam_seg_logit, pam_seg_logit, cam_seg_logit = seg_logit + loss = dict() + loss.update( + add_prefix( + super(DAHead, self).losses(pam_cam_seg_logit, seg_label), + 'pam_cam')) + loss.update( + add_prefix( + super(DAHead, self).losses(pam_seg_logit, seg_label), 'pam')) + loss.update( + add_prefix( + super(DAHead, self).losses(cam_seg_logit, seg_label), 'cam')) + return loss diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/decode_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/decode_head.py new file mode 100644 index 0000000..d08b1d0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/decode_head.py @@ -0,0 +1,266 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from abc import ABCMeta, abstractmethod + +import torch +import torch.nn as nn +from mmcv.runner import BaseModule, auto_fp16, force_fp32 + +from mmseg.core import build_pixel_sampler +from mmseg.ops import resize +from ..builder import build_loss +from ..losses import accuracy + + +class BaseDecodeHead(BaseModule, metaclass=ABCMeta): + """Base class for BaseDecodeHead. + + Args: + in_channels (int|Sequence[int]): Input channels. + channels (int): Channels after modules, before conv_seg. + num_classes (int): Number of classes. + dropout_ratio (float): Ratio of dropout layer. Default: 0.1. + conv_cfg (dict|None): Config of conv layers. Default: None. + norm_cfg (dict|None): Config of norm layers. Default: None. + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU') + in_index (int|Sequence[int]): Input feature index. Default: -1 + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + Default: None. + loss_decode (dict | Sequence[dict]): Config of decode loss. + The `loss_name` is property of corresponding loss function which + could be shown in training log. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_ce'. + e.g. dict(type='CrossEntropyLoss'), + [dict(type='CrossEntropyLoss', loss_name='loss_ce'), + dict(type='DiceLoss', loss_name='loss_dice')] + Default: dict(type='CrossEntropyLoss'). + ignore_index (int | None): The label index to be ignored. When using + masked BCE loss, ignore_index should be set to None. Default: 255. + sampler (dict|None): The config of segmentation map sampler. + Default: None. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + """ + + def __init__(self, + in_channels, + channels, + *, + num_classes, + dropout_ratio=0.1, + conv_cfg=None, + norm_cfg=None, + act_cfg=dict(type='ReLU'), + in_index=-1, + input_transform=None, + loss_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + ignore_index=255, + sampler=None, + align_corners=False, + init_cfg=dict( + type='Normal', std=0.01, override=dict(name='conv_seg'))): + super(BaseDecodeHead, self).__init__(init_cfg) + self._init_inputs(in_channels, in_index, input_transform) + self.channels = channels + self.num_classes = num_classes + self.dropout_ratio = dropout_ratio + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.in_index = in_index + + self.ignore_index = ignore_index + self.align_corners = align_corners + + if isinstance(loss_decode, dict): + self.loss_decode = build_loss(loss_decode) + elif isinstance(loss_decode, (list, tuple)): + self.loss_decode = nn.ModuleList() + for loss in loss_decode: + self.loss_decode.append(build_loss(loss)) + else: + raise TypeError(f'loss_decode must be a dict or sequence of dict,\ + but got {type(loss_decode)}') + + if sampler is not None: + self.sampler = build_pixel_sampler(sampler, context=self) + else: + self.sampler = None + + self.conv_seg = nn.Conv2d(channels, num_classes, kernel_size=1) + if dropout_ratio > 0: + self.dropout = nn.Dropout2d(dropout_ratio) + else: + self.dropout = None + self.fp16_enabled = False + + def extra_repr(self): + """Extra repr.""" + s = f'input_transform={self.input_transform}, ' \ + f'ignore_index={self.ignore_index}, ' \ + f'align_corners={self.align_corners}' + return s + + def _init_inputs(self, in_channels, in_index, input_transform): + """Check and initialize input transforms. + + The in_channels, in_index and input_transform must match. + Specifically, when input_transform is None, only single feature map + will be selected. So in_channels and in_index must be of type int. + When input_transform + + Args: + in_channels (int|Sequence[int]): Input channels. + in_index (int|Sequence[int]): Input feature index. + input_transform (str|None): Transformation type of input features. + Options: 'resize_concat', 'multiple_select', None. + 'resize_concat': Multiple feature maps will be resize to the + same size as first one and than concat together. + Usually used in FCN head of HRNet. + 'multiple_select': Multiple feature maps will be bundle into + a list and passed into decode head. + None: Only one select feature map is allowed. + """ + + if input_transform is not None: + assert input_transform in ['resize_concat', 'multiple_select'] + self.input_transform = input_transform + self.in_index = in_index + if input_transform is not None: + assert isinstance(in_channels, (list, tuple)) + assert isinstance(in_index, (list, tuple)) + assert len(in_channels) == len(in_index) + if input_transform == 'resize_concat': + self.in_channels = sum(in_channels) + else: + self.in_channels = in_channels + else: + assert isinstance(in_channels, int) + assert isinstance(in_index, int) + self.in_channels = in_channels + + def _transform_inputs(self, inputs): + """Transform inputs for decoder. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + Tensor: The transformed inputs + """ + + if self.input_transform == 'resize_concat': + inputs = [inputs[i] for i in self.in_index] + upsampled_inputs = [ + resize( + input=x, + size=inputs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) for x in inputs + ] + inputs = torch.cat(upsampled_inputs, dim=1) + elif self.input_transform == 'multiple_select': + inputs = [inputs[i] for i in self.in_index] + else: + inputs = inputs[self.in_index] + + return inputs + + @auto_fp16() + @abstractmethod + def forward(self, inputs): + """Placeholder of forward function.""" + pass + + def forward_train(self, inputs, img_metas, gt_semantic_seg, train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + seg_logits = self.forward(inputs) + losses = self.losses(seg_logits, gt_semantic_seg) + return losses + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + return self.forward(inputs) + + def cls_seg(self, feat): + """Classify each pixel.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.conv_seg(feat) + return output + + @force_fp32(apply_to=('seg_logit', )) + def losses(self, seg_logit, seg_label): + """Compute segmentation loss.""" + loss = dict() + seg_logit = resize( + input=seg_logit, + size=seg_label.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + if self.sampler is not None: + seg_weight = self.sampler.sample(seg_logit, seg_label) + else: + seg_weight = None + seg_label = seg_label.squeeze(1) + + if not isinstance(self.loss_decode, nn.ModuleList): + losses_decode = [self.loss_decode] + else: + losses_decode = self.loss_decode + for loss_decode in losses_decode: + if loss_decode.loss_name not in loss: + loss[loss_decode.loss_name] = loss_decode( + seg_logit, + seg_label, + weight=seg_weight, + ignore_index=self.ignore_index) + else: + loss[loss_decode.loss_name] += loss_decode( + seg_logit, + seg_label, + weight=seg_weight, + ignore_index=self.ignore_index) + + loss['acc_seg'] = accuracy( + seg_logit, seg_label, ignore_index=self.ignore_index) + return loss diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/dm_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/dm_head.py new file mode 100644 index 0000000..ffaa870 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/dm_head.py @@ -0,0 +1,141 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class DCM(nn.Module): + """Dynamic Convolutional Module used in DMNet. + + Args: + filter_size (int): The filter size of generated convolution kernel + used in Dynamic Convolutional Module. + fusion (bool): Add one conv to fuse DCM output feature. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, filter_size, fusion, in_channels, channels, conv_cfg, + norm_cfg, act_cfg): + super(DCM, self).__init__() + self.filter_size = filter_size + self.fusion = fusion + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.filter_gen_conv = nn.Conv2d(self.in_channels, self.channels, 1, 1, + 0) + + self.input_redu_conv = ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + if self.norm_cfg is not None: + self.norm = build_norm_layer(self.norm_cfg, self.channels)[1] + else: + self.norm = None + self.activate = build_activation_layer(self.act_cfg) + + if self.fusion: + self.fusion_conv = ConvModule( + self.channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, x): + """Forward function.""" + generated_filter = self.filter_gen_conv( + F.adaptive_avg_pool2d(x, self.filter_size)) + x = self.input_redu_conv(x) + b, c, h, w = x.shape + # [1, b * c, h, w], c = self.channels + x = x.view(1, b * c, h, w) + # [b * c, 1, filter_size, filter_size] + generated_filter = generated_filter.view(b * c, 1, self.filter_size, + self.filter_size) + pad = (self.filter_size - 1) // 2 + if (self.filter_size - 1) % 2 == 0: + p2d = (pad, pad, pad, pad) + else: + p2d = (pad + 1, pad, pad + 1, pad) + x = F.pad(input=x, pad=p2d, mode='constant', value=0) + # [1, b * c, h, w] + output = F.conv2d(input=x, weight=generated_filter, groups=b * c) + # [b, c, h, w] + output = output.view(b, c, h, w) + if self.norm is not None: + output = self.norm(output) + output = self.activate(output) + + if self.fusion: + output = self.fusion_conv(output) + + return output + + +@HEADS.register_module() +class DMHead(BaseDecodeHead): + """Dynamic Multi-scale Filters for Semantic Segmentation. + + This head is the implementation of + `DMNet `_. + + Args: + filter_sizes (tuple[int]): The size of generated convolutional filters + used in Dynamic Convolutional Module. Default: (1, 3, 5, 7). + fusion (bool): Add one conv to fuse DCM output feature. + """ + + def __init__(self, filter_sizes=(1, 3, 5, 7), fusion=False, **kwargs): + super(DMHead, self).__init__(**kwargs) + assert isinstance(filter_sizes, (list, tuple)) + self.filter_sizes = filter_sizes + self.fusion = fusion + dcm_modules = [] + for filter_size in self.filter_sizes: + dcm_modules.append( + DCM(filter_size, + self.fusion, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.dcm_modules = nn.ModuleList(dcm_modules) + self.bottleneck = ConvModule( + self.in_channels + len(filter_sizes) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + dcm_outs = [x] + for dcm_module in self.dcm_modules: + dcm_outs.append(dcm_module(x)) + dcm_outs = torch.cat(dcm_outs, dim=1) + output = self.bottleneck(dcm_outs) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/dnl_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/dnl_head.py new file mode 100644 index 0000000..dabf154 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/dnl_head.py @@ -0,0 +1,137 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.cnn import NonLocal2d +from torch import nn + +from ..builder import HEADS +from .fcn_head import FCNHead + + +class DisentangledNonLocal2d(NonLocal2d): + """Disentangled Non-Local Blocks. + + Args: + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, *arg, temperature, **kwargs): + super().__init__(*arg, **kwargs) + self.temperature = temperature + self.conv_mask = nn.Conv2d(self.in_channels, 1, kernel_size=1) + + def embedded_gaussian(self, theta_x, phi_x): + """Embedded gaussian with temperature.""" + + # NonLocal2d pairwise_weight: [N, HxW, HxW] + pairwise_weight = torch.matmul(theta_x, phi_x) + if self.use_scale: + # theta_x.shape[-1] is `self.inter_channels` + pairwise_weight /= torch.tensor( + theta_x.shape[-1], + dtype=torch.float, + device=pairwise_weight.device)**torch.tensor( + 0.5, device=pairwise_weight.device) + pairwise_weight /= torch.tensor( + self.temperature, device=pairwise_weight.device) + pairwise_weight = pairwise_weight.softmax(dim=-1) + return pairwise_weight + + def forward(self, x): + # x: [N, C, H, W] + n = x.size(0) + + # g_x: [N, HxW, C] + g_x = self.g(x).view(n, self.inter_channels, -1) + g_x = g_x.permute(0, 2, 1) + + # theta_x: [N, HxW, C], phi_x: [N, C, HxW] + if self.mode == 'gaussian': + theta_x = x.view(n, self.in_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + if self.sub_sample: + phi_x = self.phi(x).view(n, self.in_channels, -1) + else: + phi_x = x.view(n, self.in_channels, -1) + elif self.mode == 'concatenation': + theta_x = self.theta(x).view(n, self.inter_channels, -1, 1) + phi_x = self.phi(x).view(n, self.inter_channels, 1, -1) + else: + theta_x = self.theta(x).view(n, self.inter_channels, -1) + theta_x = theta_x.permute(0, 2, 1) + phi_x = self.phi(x).view(n, self.inter_channels, -1) + + # subtract mean + theta_x -= theta_x.mean(dim=-2, keepdim=True) + phi_x -= phi_x.mean(dim=-1, keepdim=True) + + pairwise_func = getattr(self, self.mode) + # pairwise_weight: [N, HxW, HxW] + pairwise_weight = pairwise_func(theta_x, phi_x) + + # y: [N, HxW, C] + y = torch.matmul(pairwise_weight, g_x) + # y: [N, C, H, W] + y = y.permute(0, 2, 1).contiguous().reshape(n, self.inter_channels, + *x.size()[2:]) + + # unary_mask: [N, 1, HxW] + unary_mask = self.conv_mask(x) + unary_mask = unary_mask.view(n, 1, -1) + unary_mask = unary_mask.softmax(dim=-1) + # unary_x: [N, 1, C] + unary_x = torch.matmul(unary_mask, g_x) + # unary_x: [N, C, 1, 1] + unary_x = unary_x.permute(0, 2, 1).contiguous().reshape( + n, self.inter_channels, 1, 1) + + output = x + self.conv_out(y + unary_x) + + return output + + +@HEADS.register_module() +class DNLHead(FCNHead): + """Disentangled Non-Local Neural Networks. + + This head is the implementation of `DNLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: False. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + temperature (float): Temperature to adjust attention. Default: 0.05 + """ + + def __init__(self, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + temperature=0.05, + **kwargs): + super(DNLHead, self).__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.temperature = temperature + self.dnl_block = DisentangledNonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode, + temperature=self.temperature) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.dnl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/dpt_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/dpt_head.py new file mode 100644 index 0000000..6c895d0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/dpt_head.py @@ -0,0 +1,294 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, Linear, build_activation_layer +from mmcv.runner import BaseModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class ReassembleBlocks(BaseModule): + """ViTPostProcessBlock, process cls_token in ViT backbone output and + rearrange the feature vector to feature map. + + Args: + in_channels (int): ViT feature channels. Default: 768. + out_channels (List): output channels of each stage. + Default: [96, 192, 384, 768]. + readout_type (str): Type of readout operation. Default: 'ignore'. + patch_size (int): The patch size. Default: 16. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + def __init__(self, + in_channels=768, + out_channels=[96, 192, 384, 768], + readout_type='ignore', + patch_size=16, + init_cfg=None): + super(ReassembleBlocks, self).__init__(init_cfg) + + assert readout_type in ['ignore', 'add', 'project'] + self.readout_type = readout_type + self.patch_size = patch_size + + self.projects = nn.ModuleList([ + ConvModule( + in_channels=in_channels, + out_channels=out_channel, + kernel_size=1, + act_cfg=None, + ) for out_channel in out_channels + ]) + + self.resize_layers = nn.ModuleList([ + nn.ConvTranspose2d( + in_channels=out_channels[0], + out_channels=out_channels[0], + kernel_size=4, + stride=4, + padding=0), + nn.ConvTranspose2d( + in_channels=out_channels[1], + out_channels=out_channels[1], + kernel_size=2, + stride=2, + padding=0), + nn.Identity(), + nn.Conv2d( + in_channels=out_channels[3], + out_channels=out_channels[3], + kernel_size=3, + stride=2, + padding=1) + ]) + if self.readout_type == 'project': + self.readout_projects = nn.ModuleList() + for _ in range(len(self.projects)): + self.readout_projects.append( + nn.Sequential( + Linear(2 * in_channels, in_channels), + build_activation_layer(dict(type='GELU')))) + + def forward(self, inputs): + assert isinstance(inputs, list) + out = [] + for i, x in enumerate(inputs): + assert len(x) == 2 + x, cls_token = x[0], x[1] + feature_shape = x.shape + if self.readout_type == 'project': + x = x.flatten(2).permute((0, 2, 1)) + readout = cls_token.unsqueeze(1).expand_as(x) + x = self.readout_projects[i](torch.cat((x, readout), -1)) + x = x.permute(0, 2, 1).reshape(feature_shape) + elif self.readout_type == 'add': + x = x.flatten(2) + cls_token.unsqueeze(-1) + x = x.reshape(feature_shape) + else: + pass + x = self.projects[i](x) + x = self.resize_layers[i](x) + out.append(x) + return out + + +class PreActResidualConvUnit(BaseModule): + """ResidualConvUnit, pre-activate residual unit. + + Args: + in_channels (int): number of channels in the input feature map. + act_cfg (dict): dictionary to construct and config activation layer. + norm_cfg (dict): dictionary to construct and config norm layer. + stride (int): stride of the first block. Default: 1 + dilation (int): dilation rate for convs layers. Default: 1. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + def __init__(self, + in_channels, + act_cfg, + norm_cfg, + stride=1, + dilation=1, + init_cfg=None): + super(PreActResidualConvUnit, self).__init__(init_cfg) + + self.conv1 = ConvModule( + in_channels, + in_channels, + 3, + stride=stride, + padding=dilation, + dilation=dilation, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + bias=False, + order=('act', 'conv', 'norm')) + + self.conv2 = ConvModule( + in_channels, + in_channels, + 3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + bias=False, + order=('act', 'conv', 'norm')) + + def forward(self, inputs): + inputs_ = inputs.clone() + x = self.conv1(inputs) + x = self.conv2(x) + return x + inputs_ + + +class FeatureFusionBlock(BaseModule): + """FeatureFusionBlock, merge feature map from different stages. + + Args: + in_channels (int): Input channels. + act_cfg (dict): The activation config for ResidualConvUnit. + norm_cfg (dict): Config dict for normalization layer. + expand (bool): Whether expand the channels in post process block. + Default: False. + align_corners (bool): align_corner setting for bilinear upsample. + Default: True. + init_cfg (dict, optional): Initialization config dict. Default: None. + """ + + def __init__(self, + in_channels, + act_cfg, + norm_cfg, + expand=False, + align_corners=True, + init_cfg=None): + super(FeatureFusionBlock, self).__init__(init_cfg) + + self.in_channels = in_channels + self.expand = expand + self.align_corners = align_corners + + self.out_channels = in_channels + if self.expand: + self.out_channels = in_channels // 2 + + self.project = ConvModule( + self.in_channels, + self.out_channels, + kernel_size=1, + act_cfg=None, + bias=True) + + self.res_conv_unit1 = PreActResidualConvUnit( + in_channels=self.in_channels, act_cfg=act_cfg, norm_cfg=norm_cfg) + self.res_conv_unit2 = PreActResidualConvUnit( + in_channels=self.in_channels, act_cfg=act_cfg, norm_cfg=norm_cfg) + + def forward(self, *inputs): + x = inputs[0] + if len(inputs) == 2: + if x.shape != inputs[1].shape: + res = resize( + inputs[1], + size=(x.shape[2], x.shape[3]), + mode='bilinear', + align_corners=False) + else: + res = inputs[1] + x = x + self.res_conv_unit1(res) + x = self.res_conv_unit2(x) + x = resize( + x, + scale_factor=2, + mode='bilinear', + align_corners=self.align_corners) + x = self.project(x) + return x + + +@HEADS.register_module() +class DPTHead(BaseDecodeHead): + """Vision Transformers for Dense Prediction. + + This head is implemented of `DPT `_. + + Args: + embed_dims (int): The embed dimension of the ViT backbone. + Default: 768. + post_process_channels (List): Out channels of post process conv + layers. Default: [96, 192, 384, 768]. + readout_type (str): Type of readout operation. Default: 'ignore'. + patch_size (int): The patch size. Default: 16. + expand_channels (bool): Whether expand the channels in post process + block. Default: False. + act_cfg (dict): The activation config for residual conv unit. + Default dict(type='ReLU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + """ + + def __init__(self, + embed_dims=768, + post_process_channels=[96, 192, 384, 768], + readout_type='ignore', + patch_size=16, + expand_channels=False, + act_cfg=dict(type='ReLU'), + norm_cfg=dict(type='BN'), + **kwargs): + super(DPTHead, self).__init__(**kwargs) + + self.in_channels = self.in_channels + self.expand_channels = expand_channels + self.reassemble_blocks = ReassembleBlocks(embed_dims, + post_process_channels, + readout_type, patch_size) + + self.post_process_channels = [ + channel * math.pow(2, i) if expand_channels else channel + for i, channel in enumerate(post_process_channels) + ] + self.convs = nn.ModuleList() + for channel in self.post_process_channels: + self.convs.append( + ConvModule( + channel, + self.channels, + kernel_size=3, + padding=1, + act_cfg=None, + bias=False)) + self.fusion_blocks = nn.ModuleList() + for _ in range(len(self.convs)): + self.fusion_blocks.append( + FeatureFusionBlock(self.channels, act_cfg, norm_cfg)) + self.fusion_blocks[0].res_conv_unit1 = None + self.project = ConvModule( + self.channels, + self.channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg) + self.num_fusion_blocks = len(self.fusion_blocks) + self.num_reassemble_blocks = len(self.reassemble_blocks.resize_layers) + self.num_post_process_channels = len(self.post_process_channels) + assert self.num_fusion_blocks == self.num_reassemble_blocks + assert self.num_reassemble_blocks == self.num_post_process_channels + + def forward(self, inputs): + assert len(inputs) == self.num_reassemble_blocks + x = self._transform_inputs(inputs) + x = self.reassemble_blocks(x) + x = [self.convs[i](feature) for i, feature in enumerate(x)] + out = self.fusion_blocks[0](x[-1]) + for i in range(1, len(self.fusion_blocks)): + out = self.fusion_blocks[i](out, x[-(i + 1)]) + out = self.project(out) + out = self.cls_seg(out) + return out diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/ema_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/ema_head.py new file mode 100644 index 0000000..f6de167 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/ema_head.py @@ -0,0 +1,169 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +def reduce_mean(tensor): + """Reduce mean when distributed training.""" + if not (dist.is_available() and dist.is_initialized()): + return tensor + tensor = tensor.clone() + dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) + return tensor + + +class EMAModule(nn.Module): + """Expectation Maximization Attention Module used in EMANet. + + Args: + channels (int): Channels of the whole module. + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + """ + + def __init__(self, channels, num_bases, num_stages, momentum): + super(EMAModule, self).__init__() + assert num_stages >= 1, 'num_stages must be at least 1!' + self.num_bases = num_bases + self.num_stages = num_stages + self.momentum = momentum + + bases = torch.zeros(1, channels, self.num_bases) + bases.normal_(0, math.sqrt(2. / self.num_bases)) + # [1, channels, num_bases] + bases = F.normalize(bases, dim=1, p=2) + self.register_buffer('bases', bases) + + def forward(self, feats): + """Forward function.""" + batch_size, channels, height, width = feats.size() + # [batch_size, channels, height*width] + feats = feats.view(batch_size, channels, height * width) + # [batch_size, channels, num_bases] + bases = self.bases.repeat(batch_size, 1, 1) + + with torch.no_grad(): + for i in range(self.num_stages): + # [batch_size, height*width, num_bases] + attention = torch.einsum('bcn,bck->bnk', feats, bases) + attention = F.softmax(attention, dim=2) + # l1 norm + attention_normed = F.normalize(attention, dim=1, p=1) + # [batch_size, channels, num_bases] + bases = torch.einsum('bcn,bnk->bck', feats, attention_normed) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + + feats_recon = torch.einsum('bck,bnk->bcn', bases, attention) + feats_recon = feats_recon.view(batch_size, channels, height, width) + + if self.training: + bases = bases.mean(dim=0, keepdim=True) + bases = reduce_mean(bases) + # l2 norm + bases = F.normalize(bases, dim=1, p=2) + self.bases = (1 - + self.momentum) * self.bases + self.momentum * bases + + return feats_recon + + +@HEADS.register_module() +class EMAHead(BaseDecodeHead): + """Expectation Maximization Attention Networks for Semantic Segmentation. + + This head is the implementation of `EMANet + `_. + + Args: + ema_channels (int): EMA module channels + num_bases (int): Number of bases. + num_stages (int): Number of the EM iterations. + concat_input (bool): Whether concat the input and output of convs + before classification layer. Default: True + momentum (float): Momentum to update the base. Default: 0.1. + """ + + def __init__(self, + ema_channels, + num_bases, + num_stages, + concat_input=True, + momentum=0.1, + **kwargs): + super(EMAHead, self).__init__(**kwargs) + self.ema_channels = ema_channels + self.num_bases = num_bases + self.num_stages = num_stages + self.concat_input = concat_input + self.momentum = momentum + self.ema_module = EMAModule(self.ema_channels, self.num_bases, + self.num_stages, self.momentum) + + self.ema_in_conv = ConvModule( + self.in_channels, + self.ema_channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + # project (0, inf) -> (-inf, inf) + self.ema_mid_conv = ConvModule( + self.ema_channels, + self.ema_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=None, + act_cfg=None) + for param in self.ema_mid_conv.parameters(): + param.requires_grad = False + + self.ema_out_conv = ConvModule( + self.ema_channels, + self.ema_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=None) + self.bottleneck = ConvModule( + self.ema_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.ema_in_conv(x) + identity = feats + feats = self.ema_mid_conv(feats) + recon = self.ema_module(feats) + recon = F.relu(recon, inplace=True) + recon = self.ema_out_conv(recon) + output = F.relu(identity + recon, inplace=True) + output = self.bottleneck(output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/enc_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/enc_head.py new file mode 100644 index 0000000..648c890 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/enc_head.py @@ -0,0 +1,188 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule, build_norm_layer + +from mmseg.ops import Encoding, resize +from ..builder import HEADS, build_loss +from .decode_head import BaseDecodeHead + + +class EncModule(nn.Module): + """Encoding Module used in EncNet. + + Args: + in_channels (int): Input channels. + num_codes (int): Number of code words. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + """ + + def __init__(self, in_channels, num_codes, conv_cfg, norm_cfg, act_cfg): + super(EncModule, self).__init__() + self.encoding_project = ConvModule( + in_channels, + in_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + # TODO: resolve this hack + # change to 1d + if norm_cfg is not None: + encoding_norm_cfg = norm_cfg.copy() + if encoding_norm_cfg['type'] in ['BN', 'IN']: + encoding_norm_cfg['type'] += '1d' + else: + encoding_norm_cfg['type'] = encoding_norm_cfg['type'].replace( + '2d', '1d') + else: + # fallback to BN1d + encoding_norm_cfg = dict(type='BN1d') + self.encoding = nn.Sequential( + Encoding(channels=in_channels, num_codes=num_codes), + build_norm_layer(encoding_norm_cfg, num_codes)[1], + nn.ReLU(inplace=True)) + self.fc = nn.Sequential( + nn.Linear(in_channels, in_channels), nn.Sigmoid()) + + def forward(self, x): + """Forward function.""" + encoding_projection = self.encoding_project(x) + encoding_feat = self.encoding(encoding_projection).mean(dim=1) + batch_size, channels, _, _ = x.size() + gamma = self.fc(encoding_feat) + y = gamma.view(batch_size, channels, 1, 1) + output = F.relu_(x + x * y) + return encoding_feat, output + + +@HEADS.register_module() +class EncHead(BaseDecodeHead): + """Context Encoding for Semantic Segmentation. + + This head is the implementation of `EncNet + `_. + + Args: + num_codes (int): Number of code words. Default: 32. + use_se_loss (bool): Whether use Semantic Encoding Loss (SE-loss) to + regularize the training. Default: True. + add_lateral (bool): Whether use lateral connection to fuse features. + Default: False. + loss_se_decode (dict): Config of decode loss. + Default: dict(type='CrossEntropyLoss', use_sigmoid=True). + """ + + def __init__(self, + num_codes=32, + use_se_loss=True, + add_lateral=False, + loss_se_decode=dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=0.2), + **kwargs): + super(EncHead, self).__init__( + input_transform='multiple_select', **kwargs) + self.use_se_loss = use_se_loss + self.add_lateral = add_lateral + self.num_codes = num_codes + self.bottleneck = ConvModule( + self.in_channels[-1], + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if add_lateral: + self.lateral_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the last one + self.lateral_convs.append( + ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + self.fusion = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.enc_module = EncModule( + self.channels, + num_codes=num_codes, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if self.use_se_loss: + self.loss_se_decode = build_loss(loss_se_decode) + self.se_layer = nn.Linear(self.channels, self.num_classes) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + feat = self.bottleneck(inputs[-1]) + if self.add_lateral: + laterals = [ + resize( + lateral_conv(inputs[i]), + size=feat.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + feat = self.fusion(torch.cat([feat, *laterals], 1)) + encode_feat, output = self.enc_module(feat) + output = self.cls_seg(output) + if self.use_se_loss: + se_output = self.se_layer(encode_feat) + return output, se_output + else: + return output + + def forward_test(self, inputs, img_metas, test_cfg): + """Forward function for testing, ignore se_loss.""" + if self.use_se_loss: + return self.forward(inputs)[0] + else: + return self.forward(inputs) + + @staticmethod + def _convert_to_onehot_labels(seg_label, num_classes): + """Convert segmentation label to onehot. + + Args: + seg_label (Tensor): Segmentation label of shape (N, H, W). + num_classes (int): Number of classes. + + Returns: + Tensor: Onehot labels of shape (N, num_classes). + """ + + batch_size = seg_label.size(0) + onehot_labels = seg_label.new_zeros((batch_size, num_classes)) + for i in range(batch_size): + hist = seg_label[i].float().histc( + bins=num_classes, min=0, max=num_classes - 1) + onehot_labels[i] = hist > 0 + return onehot_labels + + def losses(self, seg_logit, seg_label): + """Compute segmentation and semantic encoding loss.""" + seg_logit, se_seg_logit = seg_logit + loss = dict() + loss.update(super(EncHead, self).losses(seg_logit, seg_label)) + se_loss = self.loss_se_decode( + se_seg_logit, + self._convert_to_onehot_labels(seg_label, self.num_classes)) + loss['loss_se'] = se_loss + return loss diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/fcn_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/fcn_head.py new file mode 100644 index 0000000..fb79a0d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/fcn_head.py @@ -0,0 +1,96 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class FCNHead(BaseDecodeHead): + """Fully Convolution Networks for Semantic Segmentation. + + This head is implemented of `FCNNet `_. + + Args: + num_convs (int): Number of convs in the head. Default: 2. + kernel_size (int): The kernel size for convs in the head. Default: 3. + concat_input (bool): Whether concat the input and output of convs + before classification layer. + dilation (int): The dilation rate for convs in the head. Default: 1. + """ + + def __init__(self, + num_convs=2, + kernel_size=3, + concat_input=True, + dilation=1, + **kwargs): + assert num_convs >= 0 and dilation > 0 and isinstance(dilation, int) + self.num_convs = num_convs + self.concat_input = concat_input + self.kernel_size = kernel_size + super(FCNHead, self).__init__(**kwargs) + if num_convs == 0: + assert self.in_channels == self.channels + + conv_padding = (kernel_size // 2) * dilation + convs = [] + convs.append( + ConvModule( + self.in_channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + for i in range(num_convs - 1): + convs.append( + ConvModule( + self.channels, + self.channels, + kernel_size=kernel_size, + padding=conv_padding, + dilation=dilation, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + if num_convs == 0: + self.convs = nn.Identity() + else: + self.convs = nn.Sequential(*convs) + if self.concat_input: + self.conv_cat = ConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=kernel_size, + padding=kernel_size // 2, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _forward_feature(self, inputs): + """Forward function for feature maps before classifying each pixel with + ``self.cls_seg`` fc. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + feats (Tensor): A tensor of shape (batch_size, self.channels, + H, W) which is feature map for last layer of decoder head. + """ + x = self._transform_inputs(inputs) + feats = self.convs(x) + if self.concat_input: + feats = self.conv_cat(torch.cat([x, feats], dim=1)) + return feats + + def forward(self, inputs): + """Forward function.""" + output = self._forward_feature(inputs) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/fpn_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/fpn_head.py new file mode 100644 index 0000000..e41f324 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/fpn_head.py @@ -0,0 +1,69 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.ops import Upsample, resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class FPNHead(BaseDecodeHead): + """Panoptic Feature Pyramid Networks. + + This head is the implementation of `Semantic FPN + `_. + + Args: + feature_strides (tuple[int]): The strides for input feature maps. + stack_lateral. All strides suppose to be power of 2. The first + one is of largest resolution. + """ + + def __init__(self, feature_strides, **kwargs): + super(FPNHead, self).__init__( + input_transform='multiple_select', **kwargs) + assert len(feature_strides) == len(self.in_channels) + assert min(feature_strides) == feature_strides[0] + self.feature_strides = feature_strides + + self.scale_heads = nn.ModuleList() + for i in range(len(feature_strides)): + head_length = max( + 1, + int(np.log2(feature_strides[i]) - np.log2(feature_strides[0]))) + scale_head = [] + for k in range(head_length): + scale_head.append( + ConvModule( + self.in_channels[i] if k == 0 else self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + if feature_strides[i] != feature_strides[0]: + scale_head.append( + Upsample( + scale_factor=2, + mode='bilinear', + align_corners=self.align_corners)) + self.scale_heads.append(nn.Sequential(*scale_head)) + + def forward(self, inputs): + + x = self._transform_inputs(inputs) + + output = self.scale_heads[0](x[0]) + for i in range(1, len(self.feature_strides)): + # non inplace + output = output + resize( + self.scale_heads[i](x[i]), + size=output.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/gc_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/gc_head.py new file mode 100644 index 0000000..eed5074 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/gc_head.py @@ -0,0 +1,48 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.cnn import ContextBlock + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class GCHead(FCNHead): + """GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond. + + This head is the implementation of `GCNet + `_. + + Args: + ratio (float): Multiplier of channels ratio. Default: 1/4. + pooling_type (str): The pooling type of context aggregation. + Options are 'att', 'avg'. Default: 'avg'. + fusion_types (tuple[str]): The fusion type for feature fusion. + Options are 'channel_add', 'channel_mul'. Default: ('channel_add',) + """ + + def __init__(self, + ratio=1 / 4., + pooling_type='att', + fusion_types=('channel_add', ), + **kwargs): + super(GCHead, self).__init__(num_convs=2, **kwargs) + self.ratio = ratio + self.pooling_type = pooling_type + self.fusion_types = fusion_types + self.gc_block = ContextBlock( + in_channels=self.channels, + ratio=self.ratio, + pooling_type=self.pooling_type, + fusion_types=self.fusion_types) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.gc_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/isa_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/isa_head.py new file mode 100644 index 0000000..0bf3455 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/isa_head.py @@ -0,0 +1,143 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .decode_head import BaseDecodeHead + + +class SelfAttentionBlock(_SelfAttentionBlock): + """Self-Attention Module. + + Args: + in_channels (int): Input channels of key/query feature. + channels (int): Output channels of key/query transform. + conv_cfg (dict | None): Config of conv layers. + norm_cfg (dict | None): Config of norm layers. + act_cfg (dict | None): Config of activation layers. + """ + + def __init__(self, in_channels, channels, conv_cfg, norm_cfg, act_cfg): + super(SelfAttentionBlock, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=None, + key_downsample=None, + key_query_num_convs=2, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=False, + matmul_norm=True, + with_out=False, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + self.output_project = self.build_project( + in_channels, + in_channels, + num_convs=1, + use_conv_module=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x): + """Forward function.""" + context = super(SelfAttentionBlock, self).forward(x, x) + return self.output_project(context) + + +@HEADS.register_module() +class ISAHead(BaseDecodeHead): + """Interlaced Sparse Self-Attention for Semantic Segmentation. + + This head is the implementation of `ISA + `_. + + Args: + isa_channels (int): The channels of ISA Module. + down_factor (tuple[int]): The local group size of ISA. + """ + + def __init__(self, isa_channels, down_factor=(8, 8), **kwargs): + super(ISAHead, self).__init__(**kwargs) + self.down_factor = down_factor + + self.in_conv = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.global_relation = SelfAttentionBlock( + self.channels, + isa_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.local_relation = SelfAttentionBlock( + self.channels, + isa_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.out_conv = ConvModule( + self.channels * 2, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x_ = self._transform_inputs(inputs) + x = self.in_conv(x_) + residual = x + + n, c, h, w = x.size() + loc_h, loc_w = self.down_factor # size of local group in H- and W-axes + glb_h, glb_w = math.ceil(h / loc_h), math.ceil(w / loc_w) + pad_h, pad_w = glb_h * loc_h - h, glb_w * loc_w - w + if pad_h > 0 or pad_w > 0: # pad if the size is not divisible + padding = (pad_w // 2, pad_w - pad_w // 2, pad_h // 2, + pad_h - pad_h // 2) + x = F.pad(x, padding) + + # global relation + x = x.view(n, c, glb_h, loc_h, glb_w, loc_w) + # do permutation to gather global group + x = x.permute(0, 3, 5, 1, 2, 4) # (n, loc_h, loc_w, c, glb_h, glb_w) + x = x.reshape(-1, c, glb_h, glb_w) + # apply attention within each global group + x = self.global_relation(x) # (n * loc_h * loc_w, c, glb_h, glb_w) + + # local relation + x = x.view(n, loc_h, loc_w, c, glb_h, glb_w) + # do permutation to gather local group + x = x.permute(0, 4, 5, 3, 1, 2) # (n, glb_h, glb_w, c, loc_h, loc_w) + x = x.reshape(-1, c, loc_h, loc_w) + # apply attention within each local group + x = self.local_relation(x) # (n * glb_h * glb_w, c, loc_h, loc_w) + + # permute each pixel back to its original position + x = x.view(n, glb_h, glb_w, c, loc_h, loc_w) + x = x.permute(0, 3, 1, 4, 2, 5) # (n, c, glb_h, loc_h, glb_w, loc_w) + x = x.reshape(n, c, glb_h * loc_h, glb_w * loc_w) + if pad_h > 0 or pad_w > 0: # remove padding + x = x[:, :, pad_h // 2:pad_h // 2 + h, pad_w // 2:pad_w // 2 + w] + + x = self.out_conv(torch.cat([x, residual], dim=1)) + out = self.cls_seg(x) + + return out diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/knet_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/knet_head.py new file mode 100644 index 0000000..f73dacc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/knet_head.py @@ -0,0 +1,453 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule, build_activation_layer, build_norm_layer +from mmcv.cnn.bricks.transformer import (FFN, TRANSFORMER_LAYER, + MultiheadAttention, + build_transformer_layer) + +from mmseg.models.builder import HEADS, build_head +from mmseg.models.decode_heads.decode_head import BaseDecodeHead +from mmseg.utils import get_root_logger + + +@TRANSFORMER_LAYER.register_module() +class KernelUpdator(nn.Module): + """Dynamic Kernel Updator in Kernel Update Head. + + Args: + in_channels (int): The number of channels of input feature map. + Default: 256. + feat_channels (int): The number of middle-stage channels in + the kernel updator. Default: 64. + out_channels (int): The number of output channels. + gate_sigmoid (bool): Whether use sigmoid function in gate + mechanism. Default: True. + gate_norm_act (bool): Whether add normalization and activation + layer in gate mechanism. Default: False. + activate_out: Whether add activation after gate mechanism. + Default: False. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='LN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + """ + + def __init__( + self, + in_channels=256, + feat_channels=64, + out_channels=None, + gate_sigmoid=True, + gate_norm_act=False, + activate_out=False, + norm_cfg=dict(type='LN'), + act_cfg=dict(type='ReLU', inplace=True), + ): + super(KernelUpdator, self).__init__() + self.in_channels = in_channels + self.feat_channels = feat_channels + self.out_channels_raw = out_channels + self.gate_sigmoid = gate_sigmoid + self.gate_norm_act = gate_norm_act + self.activate_out = activate_out + self.act_cfg = act_cfg + self.norm_cfg = norm_cfg + self.out_channels = out_channels if out_channels else in_channels + + self.num_params_in = self.feat_channels + self.num_params_out = self.feat_channels + self.dynamic_layer = nn.Linear( + self.in_channels, self.num_params_in + self.num_params_out) + self.input_layer = nn.Linear(self.in_channels, + self.num_params_in + self.num_params_out, + 1) + self.input_gate = nn.Linear(self.in_channels, self.feat_channels, 1) + self.update_gate = nn.Linear(self.in_channels, self.feat_channels, 1) + if self.gate_norm_act: + self.gate_norm = build_norm_layer(norm_cfg, self.feat_channels)[1] + + self.norm_in = build_norm_layer(norm_cfg, self.feat_channels)[1] + self.norm_out = build_norm_layer(norm_cfg, self.feat_channels)[1] + self.input_norm_in = build_norm_layer(norm_cfg, self.feat_channels)[1] + self.input_norm_out = build_norm_layer(norm_cfg, self.feat_channels)[1] + + self.activation = build_activation_layer(act_cfg) + + self.fc_layer = nn.Linear(self.feat_channels, self.out_channels, 1) + self.fc_norm = build_norm_layer(norm_cfg, self.out_channels)[1] + + def forward(self, update_feature, input_feature): + """Forward function of KernelUpdator. + + Args: + update_feature (torch.Tensor): Feature map assembled from + each group. It would be reshaped with last dimension + shape: `self.in_channels`. + input_feature (torch.Tensor): Intermediate feature + with shape: (N, num_classes, conv_kernel_size**2, channels). + Returns: + Tensor: The output tensor of shape (N*C1/C2, K*K, C2), where N is + the number of classes, C1 and C2 are the feature map channels of + KernelUpdateHead and KernelUpdator, respectively. + """ + + update_feature = update_feature.reshape(-1, self.in_channels) + num_proposals = update_feature.size(0) + # dynamic_layer works for + # phi_1 and psi_3 in Eq.(4) and (5) of K-Net paper + parameters = self.dynamic_layer(update_feature) + param_in = parameters[:, :self.num_params_in].view( + -1, self.feat_channels) + param_out = parameters[:, -self.num_params_out:].view( + -1, self.feat_channels) + + # input_layer works for + # phi_2 and psi_4 in Eq.(4) and (5) of K-Net paper + input_feats = self.input_layer( + input_feature.reshape(num_proposals, -1, self.feat_channels)) + input_in = input_feats[..., :self.num_params_in] + input_out = input_feats[..., -self.num_params_out:] + + # `gate_feats` is F^G in K-Net paper + gate_feats = input_in * param_in.unsqueeze(-2) + if self.gate_norm_act: + gate_feats = self.activation(self.gate_norm(gate_feats)) + + input_gate = self.input_norm_in(self.input_gate(gate_feats)) + update_gate = self.norm_in(self.update_gate(gate_feats)) + if self.gate_sigmoid: + input_gate = input_gate.sigmoid() + update_gate = update_gate.sigmoid() + param_out = self.norm_out(param_out) + input_out = self.input_norm_out(input_out) + + if self.activate_out: + param_out = self.activation(param_out) + input_out = self.activation(input_out) + + # Gate mechanism. Eq.(5) in original paper. + # param_out has shape (batch_size, feat_channels, out_channels) + features = update_gate * param_out.unsqueeze( + -2) + input_gate * input_out + + features = self.fc_layer(features) + features = self.fc_norm(features) + features = self.activation(features) + + return features + + +@HEADS.register_module() +class KernelUpdateHead(nn.Module): + """Kernel Update Head in K-Net. + + Args: + num_classes (int): Number of classes. Default: 150. + num_ffn_fcs (int): The number of fully-connected layers in + FFNs. Default: 2. + num_heads (int): The number of parallel attention heads. + Default: 8. + num_mask_fcs (int): The number of fully connected layers for + mask prediction. Default: 3. + feedforward_channels (int): The hidden dimension of FFNs. + Defaults: 2048. + in_channels (int): The number of channels of input feature map. + Default: 256. + out_channels (int): The number of output channels. + Default: 256. + dropout (float): The Probability of an element to be + zeroed in MultiheadAttention and FFN. Default 0.0. + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + ffn_act_cfg (dict): Config of activation layers in FFN. + Default: dict(type='ReLU'). + conv_kernel_size (int): The kernel size of convolution in + Kernel Update Head for dynamic kernel updation. + Default: 1. + feat_transform_cfg (dict | None): Config of feature transform. + Default: None. + kernel_init (bool): Whether initiate mask kernel in mask head. + Default: False. + with_ffn (bool): Whether add FFN in kernel update head. + Default: True. + feat_gather_stride (int): Stride of convolution in feature transform. + Default: 1. + mask_transform_stride (int): Stride of mask transform. + Default: 1. + kernel_updator_cfg (dict): Config of kernel updator. + Default: dict( + type='DynamicConv', + in_channels=256, + feat_channels=64, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN')). + """ + + def __init__(self, + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=3, + feedforward_channels=2048, + in_channels=256, + out_channels=256, + dropout=0.0, + act_cfg=dict(type='ReLU', inplace=True), + ffn_act_cfg=dict(type='ReLU', inplace=True), + conv_kernel_size=1, + feat_transform_cfg=None, + kernel_init=False, + with_ffn=True, + feat_gather_stride=1, + mask_transform_stride=1, + kernel_updator_cfg=dict( + type='DynamicConv', + in_channels=256, + feat_channels=64, + out_channels=256, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN'))): + super(KernelUpdateHead, self).__init__() + self.num_classes = num_classes + self.in_channels = in_channels + self.out_channels = out_channels + self.fp16_enabled = False + self.dropout = dropout + self.num_heads = num_heads + self.kernel_init = kernel_init + self.with_ffn = with_ffn + self.conv_kernel_size = conv_kernel_size + self.feat_gather_stride = feat_gather_stride + self.mask_transform_stride = mask_transform_stride + + self.attention = MultiheadAttention(in_channels * conv_kernel_size**2, + num_heads, dropout) + self.attention_norm = build_norm_layer( + dict(type='LN'), in_channels * conv_kernel_size**2)[1] + self.kernel_update_conv = build_transformer_layer(kernel_updator_cfg) + + if feat_transform_cfg is not None: + kernel_size = feat_transform_cfg.pop('kernel_size', 1) + transform_channels = in_channels + self.feat_transform = ConvModule( + transform_channels, + in_channels, + kernel_size, + stride=feat_gather_stride, + padding=int(feat_gather_stride // 2), + **feat_transform_cfg) + else: + self.feat_transform = None + + if self.with_ffn: + self.ffn = FFN( + in_channels, + feedforward_channels, + num_ffn_fcs, + act_cfg=ffn_act_cfg, + dropout=dropout) + self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1] + + self.mask_fcs = nn.ModuleList() + for _ in range(num_mask_fcs): + self.mask_fcs.append( + nn.Linear(in_channels, in_channels, bias=False)) + self.mask_fcs.append( + build_norm_layer(dict(type='LN'), in_channels)[1]) + self.mask_fcs.append(build_activation_layer(act_cfg)) + + self.fc_mask = nn.Linear(in_channels, out_channels) + + def init_weights(self): + """Use xavier initialization for all weight parameter and set + classification head bias as a specific value when use focal loss.""" + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + else: + # adopt the default initialization for + # the weight and bias of the layer norm + pass + if self.kernel_init: + logger = get_root_logger() + logger.info( + 'mask kernel in mask head is normal initialized by std 0.01') + nn.init.normal_(self.fc_mask.weight, mean=0, std=0.01) + + def forward(self, x, proposal_feat, mask_preds, mask_shape=None): + """Forward function of Dynamic Instance Interactive Head. + + Args: + x (Tensor): Feature map from FPN with shape + (batch_size, feature_dimensions, H , W). + proposal_feat (Tensor): Intermediate feature get from + diihead in last stage, has shape + (batch_size, num_proposals, feature_dimensions) + mask_preds (Tensor): mask prediction from the former stage in shape + (batch_size, num_proposals, H, W). + + Returns: + Tuple: The first tensor is predicted mask with shape + (N, num_classes, H, W), the second tensor is dynamic kernel + with shape (N, num_classes, channels, K, K). + """ + N, num_proposals = proposal_feat.shape[:2] + if self.feat_transform is not None: + x = self.feat_transform(x) + + C, H, W = x.shape[-3:] + + mask_h, mask_w = mask_preds.shape[-2:] + if mask_h != H or mask_w != W: + gather_mask = F.interpolate( + mask_preds, (H, W), align_corners=False, mode='bilinear') + else: + gather_mask = mask_preds + + sigmoid_masks = gather_mask.softmax(dim=1) + + # Group Feature Assembling. Eq.(3) in original paper. + # einsum is faster than bmm by 30% + x_feat = torch.einsum('bnhw,bchw->bnc', sigmoid_masks, x) + + # obj_feat in shape [B, N, C, K, K] -> [B, N, C, K*K] -> [B, N, K*K, C] + proposal_feat = proposal_feat.reshape(N, num_proposals, + self.in_channels, + -1).permute(0, 1, 3, 2) + obj_feat = self.kernel_update_conv(x_feat, proposal_feat) + + # [B, N, K*K, C] -> [B, N, K*K*C] -> [N, B, K*K*C] + obj_feat = obj_feat.reshape(N, num_proposals, -1).permute(1, 0, 2) + obj_feat = self.attention_norm(self.attention(obj_feat)) + # [N, B, K*K*C] -> [B, N, K*K*C] + obj_feat = obj_feat.permute(1, 0, 2) + + # obj_feat in shape [B, N, K*K*C] -> [B, N, K*K, C] + obj_feat = obj_feat.reshape(N, num_proposals, -1, self.in_channels) + + # FFN + if self.with_ffn: + obj_feat = self.ffn_norm(self.ffn(obj_feat)) + + mask_feat = obj_feat + + for reg_layer in self.mask_fcs: + mask_feat = reg_layer(mask_feat) + + # [B, N, K*K, C] -> [B, N, C, K*K] + mask_feat = self.fc_mask(mask_feat).permute(0, 1, 3, 2) + + if (self.mask_transform_stride == 2 and self.feat_gather_stride == 1): + mask_x = F.interpolate( + x, scale_factor=0.5, mode='bilinear', align_corners=False) + H, W = mask_x.shape[-2:] + else: + mask_x = x + # group conv is 5x faster than unfold and uses about 1/5 memory + # Group conv vs. unfold vs. concat batch, 2.9ms :13.5ms :3.8ms + # Group conv vs. unfold vs. concat batch, 278 : 1420 : 369 + # but in real training group conv is slower than concat batch + # so we keep using concat batch. + # fold_x = F.unfold( + # mask_x, + # self.conv_kernel_size, + # padding=int(self.conv_kernel_size // 2)) + # mask_feat = mask_feat.reshape(N, num_proposals, -1) + # new_mask_preds = torch.einsum('bnc,bcl->bnl', mask_feat, fold_x) + # [B, N, C, K*K] -> [B*N, C, K, K] + mask_feat = mask_feat.reshape(N, num_proposals, C, + self.conv_kernel_size, + self.conv_kernel_size) + # [B, C, H, W] -> [1, B*C, H, W] + new_mask_preds = [] + for i in range(N): + new_mask_preds.append( + F.conv2d( + mask_x[i:i + 1], + mask_feat[i], + padding=int(self.conv_kernel_size // 2))) + + new_mask_preds = torch.cat(new_mask_preds, dim=0) + new_mask_preds = new_mask_preds.reshape(N, num_proposals, H, W) + if self.mask_transform_stride == 2: + new_mask_preds = F.interpolate( + new_mask_preds, + scale_factor=2, + mode='bilinear', + align_corners=False) + + if mask_shape is not None and mask_shape[0] != H: + new_mask_preds = F.interpolate( + new_mask_preds, + mask_shape, + align_corners=False, + mode='bilinear') + + return new_mask_preds, obj_feat.permute(0, 1, 3, 2).reshape( + N, num_proposals, self.in_channels, self.conv_kernel_size, + self.conv_kernel_size) + + +@HEADS.register_module() +class IterativeDecodeHead(BaseDecodeHead): + """K-Net: Towards Unified Image Segmentation. + + This head is the implementation of + `K-Net: `_. + + Args: + num_stages (int): The number of stages (kernel update heads) + in IterativeDecodeHead. Default: 3. + kernel_generate_head:(dict): Config of kernel generate head which + generate mask predictions, dynamic kernels and class predictions + for next kernel update heads. + kernel_update_head (dict): Config of kernel update head which refine + dynamic kernels and class predictions iteratively. + + """ + + def __init__(self, num_stages, kernel_generate_head, kernel_update_head, + **kwargs): + super(BaseDecodeHead, self).__init__(**kwargs) + assert num_stages == len(kernel_update_head) + self.num_stages = num_stages + self.kernel_generate_head = build_head(kernel_generate_head) + self.kernel_update_head = nn.ModuleList() + self.align_corners = self.kernel_generate_head.align_corners + self.num_classes = self.kernel_generate_head.num_classes + self.input_transform = self.kernel_generate_head.input_transform + self.ignore_index = self.kernel_generate_head.ignore_index + + for head_cfg in kernel_update_head: + self.kernel_update_head.append(build_head(head_cfg)) + + def forward(self, inputs): + """Forward function.""" + feats = self.kernel_generate_head._forward_feature(inputs) + sem_seg = self.kernel_generate_head.cls_seg(feats) + seg_kernels = self.kernel_generate_head.conv_seg.weight.clone() + seg_kernels = seg_kernels[None].expand( + feats.size(0), *seg_kernels.size()) + + stage_segs = [sem_seg] + for i in range(self.num_stages): + sem_seg, seg_kernels = self.kernel_update_head[i](feats, + seg_kernels, + sem_seg) + stage_segs.append(sem_seg) + if self.training: + return stage_segs + # only return the prediction of the last stage during testing + return stage_segs[-1] + + def losses(self, seg_logit, seg_label): + losses = dict() + for i, logit in enumerate(seg_logit): + loss = self.kernel_generate_head.losses(logit, seg_label) + for k, v in loss.items(): + losses[f'{k}.s{i}'] = v + + return losses diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/lraspp_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/lraspp_head.py new file mode 100644 index 0000000..c10ff0d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/lraspp_head.py @@ -0,0 +1,91 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv import is_tuple_of +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class LRASPPHead(BaseDecodeHead): + """Lite R-ASPP (LRASPP) head is proposed in Searching for MobileNetV3. + + This head is the improved implementation of `Searching for MobileNetV3 + `_. + + Args: + branch_channels (tuple[int]): The number of output channels in every + each branch. Default: (32, 64). + """ + + def __init__(self, branch_channels=(32, 64), **kwargs): + super(LRASPPHead, self).__init__(**kwargs) + if self.input_transform != 'multiple_select': + raise ValueError('in Lite R-ASPP (LRASPP) head, input_transform ' + f'must be \'multiple_select\'. But received ' + f'\'{self.input_transform}\'') + assert is_tuple_of(branch_channels, int) + assert len(branch_channels) == len(self.in_channels) - 1 + self.branch_channels = branch_channels + + self.convs = nn.Sequential() + self.conv_ups = nn.Sequential() + for i in range(len(branch_channels)): + self.convs.add_module( + f'conv{i}', + nn.Conv2d( + self.in_channels[i], branch_channels[i], 1, bias=False)) + self.conv_ups.add_module( + f'conv_up{i}', + ConvModule( + self.channels + branch_channels[i], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False)) + + self.conv_up_input = nn.Conv2d(self.channels, self.channels, 1) + + self.aspp_conv = ConvModule( + self.in_channels[-1], + self.channels, + 1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + bias=False) + self.image_pool = nn.Sequential( + nn.AvgPool2d(kernel_size=49, stride=(16, 20)), + ConvModule( + self.in_channels[2], + self.channels, + 1, + act_cfg=dict(type='Sigmoid'), + bias=False)) + + def forward(self, inputs): + """Forward function.""" + inputs = self._transform_inputs(inputs) + + x = inputs[-1] + + x = self.aspp_conv(x) * resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + x = self.conv_up_input(x) + + for i in range(len(self.branch_channels) - 1, -1, -1): + x = resize( + x, + size=inputs[i].size()[2:], + mode='bilinear', + align_corners=self.align_corners) + x = torch.cat([x, self.convs[i](inputs[i])], 1) + x = self.conv_ups[i](x) + + return self.cls_seg(x) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/nl_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/nl_head.py new file mode 100644 index 0000000..637517e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/nl_head.py @@ -0,0 +1,50 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.cnn import NonLocal2d + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class NLHead(FCNHead): + """Non-local Neural Networks. + + This head is the implementation of `NLNet + `_. + + Args: + reduction (int): Reduction factor of projection transform. Default: 2. + use_scale (bool): Whether to scale pairwise_weight by + sqrt(1/inter_channels). Default: True. + mode (str): The nonlocal mode. Options are 'embedded_gaussian', + 'dot_product'. Default: 'embedded_gaussian.'. + """ + + def __init__(self, + reduction=2, + use_scale=True, + mode='embedded_gaussian', + **kwargs): + super(NLHead, self).__init__(num_convs=2, **kwargs) + self.reduction = reduction + self.use_scale = use_scale + self.mode = mode + self.nl_block = NonLocal2d( + in_channels=self.channels, + reduction=self.reduction, + use_scale=self.use_scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + mode=self.mode) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + output = self.convs[0](x) + output = self.nl_block(output) + output = self.convs[1](output) + if self.concat_input: + output = self.conv_cat(torch.cat([x, output], dim=1)) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/ocr_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/ocr_head.py new file mode 100644 index 0000000..09eadfb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/ocr_head.py @@ -0,0 +1,128 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from ..utils import SelfAttentionBlock as _SelfAttentionBlock +from .cascade_decode_head import BaseCascadeDecodeHead + + +class SpatialGatherModule(nn.Module): + """Aggregate the context features according to the initial predicted + probability distribution. + + Employ the soft-weighted method to aggregate the context. + """ + + def __init__(self, scale): + super(SpatialGatherModule, self).__init__() + self.scale = scale + + def forward(self, feats, probs): + """Forward function.""" + batch_size, num_classes, height, width = probs.size() + channels = feats.size(1) + probs = probs.view(batch_size, num_classes, -1) + feats = feats.view(batch_size, channels, -1) + # [batch_size, height*width, num_classes] + feats = feats.permute(0, 2, 1) + # [batch_size, channels, height*width] + probs = F.softmax(self.scale * probs, dim=2) + # [batch_size, channels, num_classes] + ocr_context = torch.matmul(probs, feats) + ocr_context = ocr_context.permute(0, 2, 1).contiguous().unsqueeze(3) + return ocr_context + + +class ObjectAttentionBlock(_SelfAttentionBlock): + """Make a OCR used SelfAttentionBlock.""" + + def __init__(self, in_channels, channels, scale, conv_cfg, norm_cfg, + act_cfg): + if scale > 1: + query_downsample = nn.MaxPool2d(kernel_size=scale) + else: + query_downsample = None + super(ObjectAttentionBlock, self).__init__( + key_in_channels=in_channels, + query_in_channels=in_channels, + channels=channels, + out_channels=in_channels, + share_key_query=False, + query_downsample=query_downsample, + key_downsample=None, + key_query_num_convs=2, + key_query_norm=True, + value_out_num_convs=1, + value_out_norm=True, + matmul_norm=True, + with_out=True, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.bottleneck = ConvModule( + in_channels * 2, + in_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, query_feats, key_feats): + """Forward function.""" + context = super(ObjectAttentionBlock, + self).forward(query_feats, key_feats) + output = self.bottleneck(torch.cat([context, query_feats], dim=1)) + if self.query_downsample is not None: + output = resize(query_feats) + + return output + + +@HEADS.register_module() +class OCRHead(BaseCascadeDecodeHead): + """Object-Contextual Representations for Semantic Segmentation. + + This head is the implementation of `OCRNet + `_. + + Args: + ocr_channels (int): The intermediate channels of OCR block. + scale (int): The scale of probability map in SpatialGatherModule in + Default: 1. + """ + + def __init__(self, ocr_channels, scale=1, **kwargs): + super(OCRHead, self).__init__(**kwargs) + self.ocr_channels = ocr_channels + self.scale = scale + self.object_context_block = ObjectAttentionBlock( + self.channels, + self.ocr_channels, + self.scale, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.spatial_gather_module = SpatialGatherModule(self.scale) + + self.bottleneck = ConvModule( + self.in_channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs, prev_output): + """Forward function.""" + x = self._transform_inputs(inputs) + feats = self.bottleneck(x) + context = self.spatial_gather_module(feats, prev_output) + object_context = self.object_context_block(feats, context) + output = self.cls_seg(object_context) + + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/point_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/point_head.py new file mode 100644 index 0000000..5e60527 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/point_head.py @@ -0,0 +1,364 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend/point_head/point_head.py # noqa + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +try: + from mmcv.ops import point_sample +except ModuleNotFoundError: + point_sample = None + +from mmseg.models.builder import HEADS +from mmseg.ops import resize +from ..losses import accuracy +from .cascade_decode_head import BaseCascadeDecodeHead + + +def calculate_uncertainty(seg_logits): + """Estimate uncertainty based on seg logits. + + For each location of the prediction ``seg_logits`` we estimate + uncertainty as the difference between top first and top second + predicted logits. + + Args: + seg_logits (Tensor): Semantic segmentation logits, + shape (batch_size, num_classes, height, width). + + Returns: + scores (Tensor): T uncertainty scores with the most uncertain + locations having the highest uncertainty score, shape ( + batch_size, 1, height, width) + """ + top2_scores = torch.topk(seg_logits, k=2, dim=1)[0] + return (top2_scores[:, 1] - top2_scores[:, 0]).unsqueeze(1) + + +@HEADS.register_module() +class PointHead(BaseCascadeDecodeHead): + """A mask point head use in PointRend. + + This head is implemented of `PointRend: Image Segmentation as + Rendering `_. + ``PointHead`` use shared multi-layer perceptron (equivalent to + nn.Conv1d) to predict the logit of input points. The fine-grained feature + and coarse feature will be concatenate together for predication. + + Args: + num_fcs (int): Number of fc layers in the head. Default: 3. + in_channels (int): Number of input channels. Default: 256. + fc_channels (int): Number of fc channels. Default: 256. + num_classes (int): Number of classes for logits. Default: 80. + class_agnostic (bool): Whether use class agnostic classification. + If so, the output channels of logits will be 1. Default: False. + coarse_pred_each_layer (bool): Whether concatenate coarse feature with + the output of each fc layer. Default: True. + conv_cfg (dict|None): Dictionary to construct and config conv layer. + Default: dict(type='Conv1d')) + norm_cfg (dict|None): Dictionary to construct and config norm layer. + Default: None. + loss_point (dict): Dictionary to construct and config loss layer of + point head. Default: dict(type='CrossEntropyLoss', use_mask=True, + loss_weight=1.0). + """ + + def __init__(self, + num_fcs=3, + coarse_pred_each_layer=True, + conv_cfg=dict(type='Conv1d'), + norm_cfg=None, + act_cfg=dict(type='ReLU', inplace=False), + **kwargs): + super(PointHead, self).__init__( + input_transform='multiple_select', + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + init_cfg=dict( + type='Normal', std=0.01, override=dict(name='fc_seg')), + **kwargs) + if point_sample is None: + raise RuntimeError('Please install mmcv-full for ' + 'point_sample ops') + + self.num_fcs = num_fcs + self.coarse_pred_each_layer = coarse_pred_each_layer + + fc_in_channels = sum(self.in_channels) + self.num_classes + fc_channels = self.channels + self.fcs = nn.ModuleList() + for k in range(num_fcs): + fc = ConvModule( + fc_in_channels, + fc_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.fcs.append(fc) + fc_in_channels = fc_channels + fc_in_channels += self.num_classes if self.coarse_pred_each_layer \ + else 0 + self.fc_seg = nn.Conv1d( + fc_in_channels, + self.num_classes, + kernel_size=1, + stride=1, + padding=0) + if self.dropout_ratio > 0: + self.dropout = nn.Dropout(self.dropout_ratio) + delattr(self, 'conv_seg') + + def cls_seg(self, feat): + """Classify each pixel with fc.""" + if self.dropout is not None: + feat = self.dropout(feat) + output = self.fc_seg(feat) + return output + + def forward(self, fine_grained_point_feats, coarse_point_feats): + x = torch.cat([fine_grained_point_feats, coarse_point_feats], dim=1) + for fc in self.fcs: + x = fc(x) + if self.coarse_pred_each_layer: + x = torch.cat((x, coarse_point_feats), dim=1) + return self.cls_seg(x) + + def _get_fine_grained_point_feats(self, x, points): + """Sample from fine grained features. + + Args: + x (list[Tensor]): Feature pyramid from by neck or backbone. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + fine_grained_feats (Tensor): Sampled fine grained feature, + shape (batch_size, sum(channels of x), num_points). + """ + + fine_grained_feats_list = [ + point_sample(_, points, align_corners=self.align_corners) + for _ in x + ] + if len(fine_grained_feats_list) > 1: + fine_grained_feats = torch.cat(fine_grained_feats_list, dim=1) + else: + fine_grained_feats = fine_grained_feats_list[0] + + return fine_grained_feats + + def _get_coarse_point_feats(self, prev_output, points): + """Sample from fine grained features. + + Args: + prev_output (list[Tensor]): Prediction of previous decode head. + points (Tensor): Point coordinates, shape (batch_size, + num_points, 2). + + Returns: + coarse_feats (Tensor): Sampled coarse feature, shape (batch_size, + num_classes, num_points). + """ + + coarse_feats = point_sample( + prev_output, points, align_corners=self.align_corners) + + return coarse_feats + + def forward_train(self, inputs, prev_output, img_metas, gt_semantic_seg, + train_cfg): + """Forward function for training. + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + train_cfg (dict): The training config. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + x = self._transform_inputs(inputs) + with torch.no_grad(): + points = self.get_points_train( + prev_output, calculate_uncertainty, cfg=train_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats(prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + point_label = point_sample( + gt_semantic_seg.float(), + points, + mode='nearest', + align_corners=self.align_corners) + point_label = point_label.squeeze(1).long() + + losses = self.losses(point_logits, point_label) + + return losses + + def forward_test(self, inputs, prev_output, img_metas, test_cfg): + """Forward function for testing. + + Args: + inputs (list[Tensor]): List of multi-level img features. + prev_output (Tensor): The output of previous decode head. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + test_cfg (dict): The testing config. + + Returns: + Tensor: Output segmentation map. + """ + + x = self._transform_inputs(inputs) + refined_seg_logits = prev_output.clone() + for _ in range(test_cfg.subdivision_steps): + refined_seg_logits = resize( + refined_seg_logits, + scale_factor=test_cfg.scale_factor, + mode='bilinear', + align_corners=self.align_corners) + batch_size, channels, height, width = refined_seg_logits.shape + point_indices, points = self.get_points_test( + refined_seg_logits, calculate_uncertainty, cfg=test_cfg) + fine_grained_point_feats = self._get_fine_grained_point_feats( + x, points) + coarse_point_feats = self._get_coarse_point_feats( + prev_output, points) + point_logits = self.forward(fine_grained_point_feats, + coarse_point_feats) + + point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1) + refined_seg_logits = refined_seg_logits.reshape( + batch_size, channels, height * width) + refined_seg_logits = refined_seg_logits.scatter_( + 2, point_indices, point_logits) + refined_seg_logits = refined_seg_logits.view( + batch_size, channels, height, width) + + return refined_seg_logits + + def losses(self, point_logits, point_label): + """Compute segmentation loss.""" + loss = dict() + if not isinstance(self.loss_decode, nn.ModuleList): + losses_decode = [self.loss_decode] + else: + losses_decode = self.loss_decode + for loss_module in losses_decode: + loss['point' + loss_module.loss_name] = loss_module( + point_logits, point_label, ignore_index=self.ignore_index) + + loss['acc_point'] = accuracy( + point_logits, point_label, ignore_index=self.ignore_index) + return loss + + def get_points_train(self, seg_logits, uncertainty_func, cfg): + """Sample points for training. + + Sample points in [0, 1] x [0, 1] coordinate space based on their + uncertainty. The uncertainties are calculated for each point using + 'uncertainty_func' function that takes point's logit prediction as + input. + + Args: + seg_logits (Tensor): Semantic segmentation logits, shape ( + batch_size, num_classes, height, width). + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Training config of point head. + + Returns: + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains the coordinates of ``num_points`` sampled + points. + """ + num_points = cfg.num_points + oversample_ratio = cfg.oversample_ratio + importance_sample_ratio = cfg.importance_sample_ratio + assert oversample_ratio >= 1 + assert 0 <= importance_sample_ratio <= 1 + batch_size = seg_logits.shape[0] + num_sampled = int(num_points * oversample_ratio) + point_coords = torch.rand( + batch_size, num_sampled, 2, device=seg_logits.device) + point_logits = point_sample(seg_logits, point_coords) + # It is crucial to calculate uncertainty based on the sampled + # prediction value for the points. Calculating uncertainties of the + # coarse predictions first and sampling them for points leads to + # incorrect results. To illustrate this: assume uncertainty func( + # logits)=-abs(logits), a sampled point between two coarse + # predictions with -1 and 1 logits has 0 logits, and therefore 0 + # uncertainty value. However, if we calculate uncertainties for the + # coarse predictions first, both will have -1 uncertainty, + # and sampled point will get -1 uncertainty. + point_uncertainties = uncertainty_func(point_logits) + num_uncertain_points = int(importance_sample_ratio * num_points) + num_random_points = num_points - num_uncertain_points + idx = torch.topk( + point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1] + shift = num_sampled * torch.arange( + batch_size, dtype=torch.long, device=seg_logits.device) + idx += shift[:, None] + point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view( + batch_size, num_uncertain_points, 2) + if num_random_points > 0: + rand_point_coords = torch.rand( + batch_size, num_random_points, 2, device=seg_logits.device) + point_coords = torch.cat((point_coords, rand_point_coords), dim=1) + return point_coords + + def get_points_test(self, seg_logits, uncertainty_func, cfg): + """Sample points for testing. + + Find ``num_points`` most uncertain points from ``uncertainty_map``. + + Args: + seg_logits (Tensor): A tensor of shape (batch_size, num_classes, + height, width) for class-specific or class-agnostic prediction. + uncertainty_func (func): uncertainty calculation function. + cfg (dict): Testing config of point head. + + Returns: + point_indices (Tensor): A tensor of shape (batch_size, num_points) + that contains indices from [0, height x width) of the most + uncertain points. + point_coords (Tensor): A tensor of shape (batch_size, num_points, + 2) that contains [0, 1] x [0, 1] normalized coordinates of the + most uncertain points from the ``height x width`` grid . + """ + + num_points = cfg.subdivision_num_points + uncertainty_map = uncertainty_func(seg_logits) + batch_size, _, height, width = uncertainty_map.shape + h_step = 1.0 / height + w_step = 1.0 / width + + uncertainty_map = uncertainty_map.view(batch_size, height * width) + num_points = min(height * width, num_points) + point_indices = uncertainty_map.topk(num_points, dim=1)[1] + point_coords = torch.zeros( + batch_size, + num_points, + 2, + dtype=torch.float, + device=seg_logits.device) + point_coords[:, :, 0] = w_step / 2.0 + (point_indices % + width).float() * w_step + point_coords[:, :, 1] = h_step / 2.0 + (point_indices // + width).float() * h_step + return point_indices, point_coords diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/psa_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/psa_head.py new file mode 100644 index 0000000..df7593c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/psa_head.py @@ -0,0 +1,197 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + +try: + from mmcv.ops import PSAMask +except ModuleNotFoundError: + PSAMask = None + + +@HEADS.register_module() +class PSAHead(BaseDecodeHead): + """Point-wise Spatial Attention Network for Scene Parsing. + + This head is the implementation of `PSANet + `_. + + Args: + mask_size (tuple[int]): The PSA mask size. It usually equals input + size. + psa_type (str): The type of psa module. Options are 'collect', + 'distribute', 'bi-direction'. Default: 'bi-direction' + compact (bool): Whether use compact map for 'collect' mode. + Default: True. + shrink_factor (int): The downsample factors of psa mask. Default: 2. + normalization_factor (float): The normalize factor of attention. + psa_softmax (bool): Whether use softmax for attention. + """ + + def __init__(self, + mask_size, + psa_type='bi-direction', + compact=False, + shrink_factor=2, + normalization_factor=1.0, + psa_softmax=True, + **kwargs): + if PSAMask is None: + raise RuntimeError('Please install mmcv-full for PSAMask ops') + super(PSAHead, self).__init__(**kwargs) + assert psa_type in ['collect', 'distribute', 'bi-direction'] + self.psa_type = psa_type + self.compact = compact + self.shrink_factor = shrink_factor + self.mask_size = mask_size + mask_h, mask_w = mask_size + self.psa_softmax = psa_softmax + if normalization_factor is None: + normalization_factor = mask_h * mask_w + self.normalization_factor = normalization_factor + + self.reduce = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.attention = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + nn.Conv2d( + self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + if psa_type == 'bi-direction': + self.reduce_p = ConvModule( + self.in_channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.attention_p = nn.Sequential( + ConvModule( + self.channels, + self.channels, + kernel_size=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + nn.Conv2d( + self.channels, mask_h * mask_w, kernel_size=1, bias=False)) + self.psamask_collect = PSAMask('collect', mask_size) + self.psamask_distribute = PSAMask('distribute', mask_size) + else: + self.psamask = PSAMask(psa_type, mask_size) + self.proj = ConvModule( + self.channels * (2 if psa_type == 'bi-direction' else 1), + self.in_channels, + kernel_size=1, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + self.bottleneck = ConvModule( + self.in_channels * 2, + self.channels, + kernel_size=3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + identity = x + align_corners = self.align_corners + if self.psa_type in ['collect', 'distribute']: + out = self.reduce(x) + n, c, h, w = out.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + out = resize( + out, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + y = self.attention(out) + if self.compact: + if self.psa_type == 'collect': + y = y.view(n, h * w, + h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y = self.psamask(y) + if self.psa_softmax: + y = F.softmax(y, dim=1) + out = torch.bmm( + out.view(n, c, h * w), y.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + else: + x_col = self.reduce(x) + x_dis = self.reduce_p(x) + n, c, h, w = x_col.size() + if self.shrink_factor != 1: + if h % self.shrink_factor and w % self.shrink_factor: + h = (h - 1) // self.shrink_factor + 1 + w = (w - 1) // self.shrink_factor + 1 + align_corners = True + else: + h = h // self.shrink_factor + w = w // self.shrink_factor + align_corners = False + x_col = resize( + x_col, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + x_dis = resize( + x_dis, + size=(h, w), + mode='bilinear', + align_corners=align_corners) + y_col = self.attention(x_col) + y_dis = self.attention_p(x_dis) + if self.compact: + y_dis = y_dis.view(n, h * w, + h * w).transpose(1, 2).view(n, h * w, h, w) + else: + y_col = self.psamask_collect(y_col) + y_dis = self.psamask_distribute(y_dis) + if self.psa_softmax: + y_col = F.softmax(y_col, dim=1) + y_dis = F.softmax(y_dis, dim=1) + x_col = torch.bmm( + x_col.view(n, c, h * w), y_col.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + x_dis = torch.bmm( + x_dis.view(n, c, h * w), y_dis.view(n, h * w, h * w)).view( + n, c, h, w) * (1.0 / self.normalization_factor) + out = torch.cat([x_col, x_dis], 1) + out = self.proj(out) + out = resize( + out, + size=identity.shape[2:], + mode='bilinear', + align_corners=align_corners) + out = self.bottleneck(torch.cat((identity, out), dim=1)) + out = self.cls_seg(out) + return out diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/psp_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/psp_head.py new file mode 100644 index 0000000..6990676 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/psp_head.py @@ -0,0 +1,117 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +class PPM(nn.ModuleList): + """Pooling Pyramid Module used in PSPNet. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. + in_channels (int): Input channels. + channels (int): Channels after modules, before conv_seg. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict): Config of activation layers. + align_corners (bool): align_corners argument of F.interpolate. + """ + + def __init__(self, pool_scales, in_channels, channels, conv_cfg, norm_cfg, + act_cfg, align_corners, **kwargs): + super(PPM, self).__init__() + self.pool_scales = pool_scales + self.align_corners = align_corners + self.in_channels = in_channels + self.channels = channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + for pool_scale in pool_scales: + self.append( + nn.Sequential( + nn.AdaptiveAvgPool2d(pool_scale), + ConvModule( + self.in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + **kwargs))) + + def forward(self, x): + """Forward function.""" + ppm_outs = [] + for ppm in self: + ppm_out = ppm(x) + upsampled_ppm_out = resize( + ppm_out, + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ppm_outs.append(upsampled_ppm_out) + return ppm_outs + + +@HEADS.register_module() +class PSPHead(BaseDecodeHead): + """Pyramid Scene Parsing Network. + + This head is the implementation of + `PSPNet `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(PSPHead, self).__init__(**kwargs) + assert isinstance(pool_scales, (list, tuple)) + self.pool_scales = pool_scales + self.psp_modules = PPM( + self.pool_scales, + self.in_channels, + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.bottleneck = ConvModule( + self.in_channels + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def _forward_feature(self, inputs): + """Forward function for feature maps before classifying each pixel with + ``self.cls_seg`` fc. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + feats (Tensor): A tensor of shape (batch_size, self.channels, + H, W) which is feature map for last layer of decoder head. + """ + x = self._transform_inputs(inputs) + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + feats = self.bottleneck(psp_outs) + return feats + + def forward(self, inputs): + """Forward function.""" + output = self._forward_feature(inputs) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/segformer_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/segformer_head.py new file mode 100644 index 0000000..d6e172e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/segformer_head.py @@ -0,0 +1,144 @@ +# Modified from +# https://github.com/NVlabs/SegFormer/blob/master/mmseg/models/decode_heads/segformer_head.py +# +# This work is licensed under the NVIDIA Source Code License. +# +# Copyright (c) 2021, NVIDIA Corporation. All rights reserved. +# NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator +# Augmentation (ADA) +# +# 1. Definitions +# "Licensor" means any person or entity that distributes its Work. +# "Software" means the original work of authorship made available under +# this License. +# "Work" means the Software and any additions to or derivative works of +# the Software that are made available under this License. +# The terms "reproduce," "reproduction," "derivative works," and +# "distribution" have the meaning as provided under U.S. copyright law; +# provided, however, that for the purposes of this License, derivative +# works shall not include works that remain separable from, or merely +# link (or bind by name) to the interfaces of, the Work. +# Works, including the Software, are "made available" under this License +# by including in or with the Work either (a) a copyright notice +# referencing the applicability of this License to the Work, or (b) a +# copy of this License. +# 2. License Grants +# 2.1 Copyright Grant. Subject to the terms and conditions of this +# License, each Licensor grants to you a perpetual, worldwide, +# non-exclusive, royalty-free, copyright license to reproduce, +# prepare derivative works of, publicly display, publicly perform, +# sublicense and distribute its Work and any resulting derivative +# works in any form. +# 3. Limitations +# 3.1 Redistribution. You may reproduce or distribute the Work only +# if (a) you do so under this License, (b) you include a complete +# copy of this License with your distribution, and (c) you retain +# without modification any copyright, patent, trademark, or +# attribution notices that are present in the Work. +# 3.2 Derivative Works. You may specify that additional or different +# terms apply to the use, reproduction, and distribution of your +# derivative works of the Work ("Your Terms") only if (a) Your Terms +# provide that the use limitation in Section 3.3 applies to your +# derivative works, and (b) you identify the specific derivative +# works that are subject to Your Terms. Notwithstanding Your Terms, +# this License (including the redistribution requirements in Section +# 3.1) will continue to apply to the Work itself. +# 3.3 Use Limitation. The Work and any derivative works thereof only +# may be used or intended for use non-commercially. Notwithstanding +# the foregoing, NVIDIA and its affiliates may use the Work and any +# derivative works commercially. As used herein, "non-commercially" +# means for research or evaluation purposes only. +# 3.4 Patent Claims. If you bring or threaten to bring a patent claim +# against any Licensor (including any claim, cross-claim or +# counterclaim in a lawsuit) to enforce any patents that you allege +# are infringed by any Work, then your rights under this License from +# such Licensor (including the grant in Section 2.1) will terminate +# immediately. +# 3.5 Trademarks. This License does not grant any rights to use any +# Licensor’s or its affiliates’ names, logos, or trademarks, except +# as necessary to reproduce the notices described in this License. +# 3.6 Termination. If you violate any term of this License, then your +# rights under this License (including the grant in Section 2.1) will +# terminate immediately. +# 4. Disclaimer of Warranty. +# THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +# NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +# THIS LICENSE. +# 5. Limitation of Liability. +# EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +# THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +# SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +# INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +# OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +# (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +# LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +# COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGES. + +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.models.builder import HEADS +from mmseg.models.decode_heads.decode_head import BaseDecodeHead +from mmseg.ops import resize + + +@HEADS.register_module() +class SegformerHead(BaseDecodeHead): + """The all mlp Head of segformer. + + This head is the implementation of + `Segformer ` _. + + Args: + interpolate_mode: The interpolate mode of MLP head upsample operation. + Default: 'bilinear'. + """ + + def __init__(self, interpolate_mode='bilinear', **kwargs): + super().__init__(input_transform='multiple_select', **kwargs) + + self.interpolate_mode = interpolate_mode + num_inputs = len(self.in_channels) + + assert num_inputs == len(self.in_index) + + self.convs = nn.ModuleList() + for i in range(num_inputs): + self.convs.append( + ConvModule( + in_channels=self.in_channels[i], + out_channels=self.channels, + kernel_size=1, + stride=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + self.fusion_conv = ConvModule( + in_channels=self.channels * num_inputs, + out_channels=self.channels, + kernel_size=1, + norm_cfg=self.norm_cfg) + + def forward(self, inputs): + # Receive 4 stage backbone feature map: 1/4, 1/8, 1/16, 1/32 + inputs = self._transform_inputs(inputs) + outs = [] + for idx in range(len(inputs)): + x = inputs[idx] + conv = self.convs[idx] + outs.append( + resize( + input=conv(x), + size=inputs[0].shape[2:], + mode=self.interpolate_mode, + align_corners=self.align_corners)) + + out = self.fusion_conv(torch.cat(outs, dim=1)) + + out = self.cls_seg(out) + + return out diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/segmenter_mask_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/segmenter_mask_head.py new file mode 100644 index 0000000..6a9b3d4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/segmenter_mask_head.py @@ -0,0 +1,133 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import build_norm_layer +from mmcv.cnn.utils.weight_init import (constant_init, trunc_normal_, + trunc_normal_init) +from mmcv.runner import ModuleList + +from mmseg.models.backbones.vit import TransformerEncoderLayer +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class SegmenterMaskTransformerHead(BaseDecodeHead): + """Segmenter: Transformer for Semantic Segmentation. + + This head is the implementation of + `Segmenter: `_. + + Args: + backbone_cfg:(dict): Config of backbone of + Context Path. + in_channels (int): The number of channels of input image. + num_layers (int): The depth of transformer. + num_heads (int): The number of attention heads. + embed_dims (int): The number of embedding dimension. + mlp_ratio (int): ratio of mlp hidden dim to embedding dim. + Default: 4. + drop_path_rate (float): stochastic depth rate. Default 0.1. + drop_rate (float): Probability of an element to be zeroed. + Default 0.0 + attn_drop_rate (float): The drop out rate for attention layer. + Default 0.0 + num_fcs (int): The number of fully-connected layers for FFNs. + Default: 2. + qkv_bias (bool): Enable bias for qkv if True. Default: True. + act_cfg (dict): The activation config for FFNs. + Default: dict(type='GELU'). + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='LN') + init_std (float): The value of std in weight initialization. + Default: 0.02. + """ + + def __init__( + self, + in_channels, + num_layers, + num_heads, + embed_dims, + mlp_ratio=4, + drop_path_rate=0.1, + drop_rate=0.0, + attn_drop_rate=0.0, + num_fcs=2, + qkv_bias=True, + act_cfg=dict(type='GELU'), + norm_cfg=dict(type='LN'), + init_std=0.02, + **kwargs, + ): + super(SegmenterMaskTransformerHead, self).__init__( + in_channels=in_channels, **kwargs) + + dpr = [x.item() for x in torch.linspace(0, drop_path_rate, num_layers)] + self.layers = ModuleList() + for i in range(num_layers): + self.layers.append( + TransformerEncoderLayer( + embed_dims=embed_dims, + num_heads=num_heads, + feedforward_channels=mlp_ratio * embed_dims, + attn_drop_rate=attn_drop_rate, + drop_rate=drop_rate, + drop_path_rate=dpr[i], + num_fcs=num_fcs, + qkv_bias=qkv_bias, + act_cfg=act_cfg, + norm_cfg=norm_cfg, + batch_first=True, + )) + + self.dec_proj = nn.Linear(in_channels, embed_dims) + + self.cls_emb = nn.Parameter( + torch.randn(1, self.num_classes, embed_dims)) + self.patch_proj = nn.Linear(embed_dims, embed_dims, bias=False) + self.classes_proj = nn.Linear(embed_dims, embed_dims, bias=False) + + self.decoder_norm = build_norm_layer( + norm_cfg, embed_dims, postfix=1)[1] + self.mask_norm = build_norm_layer( + norm_cfg, self.num_classes, postfix=2)[1] + + self.init_std = init_std + + delattr(self, 'conv_seg') + + def init_weights(self): + trunc_normal_(self.cls_emb, std=self.init_std) + trunc_normal_init(self.patch_proj, std=self.init_std) + trunc_normal_init(self.classes_proj, std=self.init_std) + for n, m in self.named_modules(): + if isinstance(m, nn.Linear): + trunc_normal_init(m, std=self.init_std, bias=0) + elif isinstance(m, nn.LayerNorm): + constant_init(m, val=1.0, bias=0.0) + + def forward(self, inputs): + x = self._transform_inputs(inputs) + b, c, h, w = x.shape + x = x.permute(0, 2, 3, 1).contiguous().view(b, -1, c) + + x = self.dec_proj(x) + cls_emb = self.cls_emb.expand(x.size(0), -1, -1) + x = torch.cat((x, cls_emb), 1) + for layer in self.layers: + x = layer(x) + x = self.decoder_norm(x) + + patches = self.patch_proj(x[:, :-self.num_classes]) + cls_seg_feat = self.classes_proj(x[:, -self.num_classes:]) + + patches = F.normalize(patches, dim=2, p=2) + cls_seg_feat = F.normalize(cls_seg_feat, dim=2, p=2) + + masks = patches @ cls_seg_feat.transpose(1, 2) + masks = self.mask_norm(masks) + masks = masks.permute(0, 2, 1).contiguous().view(b, -1, h, w) + + return masks diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/sep_aspp_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/sep_aspp_head.py new file mode 100644 index 0000000..4e894e2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/sep_aspp_head.py @@ -0,0 +1,102 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .aspp_head import ASPPHead, ASPPModule + + +class DepthwiseSeparableASPPModule(ASPPModule): + """Atrous Spatial Pyramid Pooling (ASPP) Module with depthwise separable + conv.""" + + def __init__(self, **kwargs): + super(DepthwiseSeparableASPPModule, self).__init__(**kwargs) + for i, dilation in enumerate(self.dilations): + if dilation > 1: + self[i] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + 3, + dilation=dilation, + padding=dilation, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + +@HEADS.register_module() +class DepthwiseSeparableASPPHead(ASPPHead): + """Encoder-Decoder with Atrous Separable Convolution for Semantic Image + Segmentation. + + This head is the implementation of `DeepLabV3+ + `_. + + Args: + c1_in_channels (int): The input channels of c1 decoder. If is 0, + the no decoder will be used. + c1_channels (int): The intermediate channels of c1 decoder. + """ + + def __init__(self, c1_in_channels, c1_channels, **kwargs): + super(DepthwiseSeparableASPPHead, self).__init__(**kwargs) + assert c1_in_channels >= 0 + self.aspp_modules = DepthwiseSeparableASPPModule( + dilations=self.dilations, + in_channels=self.in_channels, + channels=self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + if c1_in_channels > 0: + self.c1_bottleneck = ConvModule( + c1_in_channels, + c1_channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + else: + self.c1_bottleneck = None + self.sep_bottleneck = nn.Sequential( + DepthwiseSeparableConvModule( + self.channels + c1_channels, + self.channels, + 3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + DepthwiseSeparableConvModule( + self.channels, + self.channels, + 3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg)) + + def forward(self, inputs): + """Forward function.""" + x = self._transform_inputs(inputs) + aspp_outs = [ + resize( + self.image_pool(x), + size=x.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + ] + aspp_outs.extend(self.aspp_modules(x)) + aspp_outs = torch.cat(aspp_outs, dim=1) + output = self.bottleneck(aspp_outs) + if self.c1_bottleneck is not None: + c1_output = self.c1_bottleneck(inputs[0]) + output = resize( + input=output, + size=c1_output.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + output = torch.cat([output, c1_output], dim=1) + output = self.sep_bottleneck(output) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/sep_fcn_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/sep_fcn_head.py new file mode 100644 index 0000000..7f9658e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/sep_fcn_head.py @@ -0,0 +1,60 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.cnn import DepthwiseSeparableConvModule + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class DepthwiseSeparableFCNHead(FCNHead): + """Depthwise-Separable Fully Convolutional Network for Semantic + Segmentation. + + This head is implemented according to `Fast-SCNN: Fast Semantic + Segmentation Network `_. + + Args: + in_channels(int): Number of output channels of FFM. + channels(int): Number of middle-stage channels in the decode head. + concat_input(bool): Whether to concatenate original decode input into + the result of several consecutive convolution layers. + Default: True. + num_classes(int): Used to determine the dimension of + final prediction tensor. + in_index(int): Correspond with 'out_indices' in FastSCNN backbone. + norm_cfg (dict | None): Config of norm layers. + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + loss_decode(dict): Config of loss type and some + relevant additional options. + dw_act_cfg (dict):Activation config of depthwise ConvModule. If it is + 'default', it will be the same as `act_cfg`. Default: None. + """ + + def __init__(self, dw_act_cfg=None, **kwargs): + super(DepthwiseSeparableFCNHead, self).__init__(**kwargs) + self.convs[0] = DepthwiseSeparableConvModule( + self.in_channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg, + dw_act_cfg=dw_act_cfg) + + for i in range(1, self.num_convs): + self.convs[i] = DepthwiseSeparableConvModule( + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg, + dw_act_cfg=dw_act_cfg) + + if self.concat_input: + self.conv_cat = DepthwiseSeparableConvModule( + self.in_channels + self.channels, + self.channels, + kernel_size=self.kernel_size, + padding=self.kernel_size // 2, + norm_cfg=self.norm_cfg, + dw_act_cfg=dw_act_cfg) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/setr_mla_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/setr_mla_head.py new file mode 100644 index 0000000..6bb94ae --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/setr_mla_head.py @@ -0,0 +1,63 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.ops import Upsample +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class SETRMLAHead(BaseDecodeHead): + """Multi level feature aggretation head of SETR. + + MLA head of `SETR `_. + + Args: + mlahead_channels (int): Channels of conv-conv-4x of multi-level feature + aggregation. Default: 128. + up_scale (int): The scale factor of interpolate. Default:4. + """ + + def __init__(self, mla_channels=128, up_scale=4, **kwargs): + super(SETRMLAHead, self).__init__( + input_transform='multiple_select', **kwargs) + self.mla_channels = mla_channels + + num_inputs = len(self.in_channels) + + # Refer to self.cls_seg settings of BaseDecodeHead + assert self.channels == num_inputs * mla_channels + + self.up_convs = nn.ModuleList() + for i in range(num_inputs): + self.up_convs.append( + nn.Sequential( + ConvModule( + in_channels=self.in_channels[i], + out_channels=mla_channels, + kernel_size=3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + ConvModule( + in_channels=mla_channels, + out_channels=mla_channels, + kernel_size=3, + padding=1, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + Upsample( + scale_factor=up_scale, + mode='bilinear', + align_corners=self.align_corners))) + + def forward(self, inputs): + inputs = self._transform_inputs(inputs) + outs = [] + for x, up_conv in zip(inputs, self.up_convs): + outs.append(up_conv(x)) + out = torch.cat(outs, dim=1) + out = self.cls_seg(out) + return out diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/setr_up_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/setr_up_head.py new file mode 100644 index 0000000..87e7ea7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/setr_up_head.py @@ -0,0 +1,81 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from mmcv.cnn import ConvModule, build_norm_layer + +from mmseg.ops import Upsample +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + +@HEADS.register_module() +class SETRUPHead(BaseDecodeHead): + """Naive upsampling head and Progressive upsampling head of SETR. + + Naive or PUP head of `SETR `_. + + Args: + norm_layer (dict): Config dict for input normalization. + Default: norm_layer=dict(type='LN', eps=1e-6, requires_grad=True). + num_convs (int): Number of decoder convolutions. Default: 1. + up_scale (int): The scale factor of interpolate. Default:4. + kernel_size (int): The kernel size of convolution when decoding + feature information from backbone. Default: 3. + init_cfg (dict | list[dict] | None): Initialization config dict. + Default: dict( + type='Constant', val=1.0, bias=0, layer='LayerNorm'). + """ + + def __init__(self, + norm_layer=dict(type='LN', eps=1e-6, requires_grad=True), + num_convs=1, + up_scale=4, + kernel_size=3, + init_cfg=[ + dict(type='Constant', val=1.0, bias=0, layer='LayerNorm'), + dict( + type='Normal', + std=0.01, + override=dict(name='conv_seg')) + ], + **kwargs): + + assert kernel_size in [1, 3], 'kernel_size must be 1 or 3.' + + super(SETRUPHead, self).__init__(init_cfg=init_cfg, **kwargs) + + assert isinstance(self.in_channels, int) + + _, self.norm = build_norm_layer(norm_layer, self.in_channels) + + self.up_convs = nn.ModuleList() + in_channels = self.in_channels + out_channels = self.channels + for _ in range(num_convs): + self.up_convs.append( + nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=1, + padding=int(kernel_size - 1) // 2, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg), + Upsample( + scale_factor=up_scale, + mode='bilinear', + align_corners=self.align_corners))) + in_channels = out_channels + + def forward(self, x): + x = self._transform_inputs(x) + + n, c, h, w = x.shape + x = x.reshape(n, c, h * w).transpose(2, 1).contiguous() + x = self.norm(x) + x = x.transpose(1, 2).reshape(n, c, h, w).contiguous() + + for up_conv in self.up_convs: + x = up_conv(x) + out = self.cls_seg(x) + return out diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/stdc_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/stdc_head.py new file mode 100644 index 0000000..bddf1eb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/stdc_head.py @@ -0,0 +1,85 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn.functional as F + +from ..builder import HEADS +from .fcn_head import FCNHead + + +@HEADS.register_module() +class STDCHead(FCNHead): + """This head is the implementation of `Rethinking BiSeNet For Real-time + Semantic Segmentation `_. + + Args: + boundary_threshold (float): The threshold of calculating boundary. + Default: 0.1. + """ + + def __init__(self, boundary_threshold=0.1, **kwargs): + super(STDCHead, self).__init__(**kwargs) + self.boundary_threshold = boundary_threshold + # Using register buffer to make laplacian kernel on the same + # device of `seg_label`. + self.register_buffer( + 'laplacian_kernel', + torch.tensor([-1, -1, -1, -1, 8, -1, -1, -1, -1], + dtype=torch.float32, + requires_grad=False).reshape((1, 1, 3, 3))) + self.fusion_kernel = torch.nn.Parameter( + torch.tensor([[6. / 10], [3. / 10], [1. / 10]], + dtype=torch.float32).reshape(1, 3, 1, 1), + requires_grad=False) + + def losses(self, seg_logit, seg_label): + """Compute Detail Aggregation Loss.""" + # Note: The paper claims `fusion_kernel` is a trainable 1x1 conv + # parameters. However, it is a constant in original repo and other + # codebase because it would not be added into computation graph + # after threshold operation. + seg_label = seg_label.to(self.laplacian_kernel) + boundary_targets = F.conv2d( + seg_label, self.laplacian_kernel, padding=1) + boundary_targets = boundary_targets.clamp(min=0) + boundary_targets[boundary_targets > self.boundary_threshold] = 1 + boundary_targets[boundary_targets <= self.boundary_threshold] = 0 + + boundary_targets_x2 = F.conv2d( + seg_label, self.laplacian_kernel, stride=2, padding=1) + boundary_targets_x2 = boundary_targets_x2.clamp(min=0) + + boundary_targets_x4 = F.conv2d( + seg_label, self.laplacian_kernel, stride=4, padding=1) + boundary_targets_x4 = boundary_targets_x4.clamp(min=0) + + boundary_targets_x4_up = F.interpolate( + boundary_targets_x4, boundary_targets.shape[2:], mode='nearest') + boundary_targets_x2_up = F.interpolate( + boundary_targets_x2, boundary_targets.shape[2:], mode='nearest') + + boundary_targets_x2_up[ + boundary_targets_x2_up > self.boundary_threshold] = 1 + boundary_targets_x2_up[ + boundary_targets_x2_up <= self.boundary_threshold] = 0 + + boundary_targets_x4_up[ + boundary_targets_x4_up > self.boundary_threshold] = 1 + boundary_targets_x4_up[ + boundary_targets_x4_up <= self.boundary_threshold] = 0 + + boundary_targets_pyramids = torch.stack( + (boundary_targets, boundary_targets_x2_up, boundary_targets_x4_up), + dim=1) + + boundary_targets_pyramids = boundary_targets_pyramids.squeeze(2) + boundary_targets_pyramid = F.conv2d(boundary_targets_pyramids, + self.fusion_kernel) + + boundary_targets_pyramid[ + boundary_targets_pyramid > self.boundary_threshold] = 1 + boundary_targets_pyramid[ + boundary_targets_pyramid <= self.boundary_threshold] = 0 + + loss = super(STDCHead, self).losses(seg_logit, + boundary_targets_pyramid.long()) + return loss diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/uper_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/uper_head.py new file mode 100644 index 0000000..06b152a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/decode_heads/uper_head.py @@ -0,0 +1,140 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.ops import resize +from ..builder import HEADS +from .decode_head import BaseDecodeHead +from .psp_head import PPM + + +@HEADS.register_module() +class UPerHead(BaseDecodeHead): + """Unified Perceptual Parsing for Scene Understanding. + + This head is the implementation of `UPerNet + `_. + + Args: + pool_scales (tuple[int]): Pooling scales used in Pooling Pyramid + Module applied on the last feature. Default: (1, 2, 3, 6). + """ + + def __init__(self, pool_scales=(1, 2, 3, 6), **kwargs): + super(UPerHead, self).__init__( + input_transform='multiple_select', **kwargs) + # PSP Module + self.psp_modules = PPM( + pool_scales, + self.in_channels[-1], + self.channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + self.bottleneck = ConvModule( + self.in_channels[-1] + len(pool_scales) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + # FPN Module + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + for in_channels in self.in_channels[:-1]: # skip the top layer + l_conv = ConvModule( + in_channels, + self.channels, + 1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False) + fpn_conv = ConvModule( + self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + inplace=False) + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + self.fpn_bottleneck = ConvModule( + len(self.in_channels) * self.channels, + self.channels, + 3, + padding=1, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg) + + def psp_forward(self, inputs): + """Forward function of PSP module.""" + x = inputs[-1] + psp_outs = [x] + psp_outs.extend(self.psp_modules(x)) + psp_outs = torch.cat(psp_outs, dim=1) + output = self.bottleneck(psp_outs) + + return output + + def _forward_feature(self, inputs): + """Forward function for feature maps before classifying each pixel with + ``self.cls_seg`` fc. + + Args: + inputs (list[Tensor]): List of multi-level img features. + + Returns: + feats (Tensor): A tensor of shape (batch_size, self.channels, + H, W) which is feature map for last layer of decoder head. + """ + inputs = self._transform_inputs(inputs) + + # build laterals + laterals = [ + lateral_conv(inputs[i]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + laterals.append(self.psp_forward(inputs)) + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] = laterals[i - 1] + resize( + laterals[i], + size=prev_shape, + mode='bilinear', + align_corners=self.align_corners) + + # build outputs + fpn_outs = [ + self.fpn_convs[i](laterals[i]) + for i in range(used_backbone_levels - 1) + ] + # append psp feature + fpn_outs.append(laterals[-1]) + + for i in range(used_backbone_levels - 1, 0, -1): + fpn_outs[i] = resize( + fpn_outs[i], + size=fpn_outs[0].shape[2:], + mode='bilinear', + align_corners=self.align_corners) + fpn_outs = torch.cat(fpn_outs, dim=1) + feats = self.fpn_bottleneck(fpn_outs) + return feats + + def forward(self, inputs): + """Forward function.""" + output = self._forward_feature(inputs) + output = self.cls_seg(output) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/__init__.py new file mode 100644 index 0000000..fbc5b2d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .accuracy import Accuracy, accuracy +from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, + cross_entropy, mask_cross_entropy) +from .dice_loss import DiceLoss +from .focal_loss import FocalLoss +from .lovasz_loss import LovaszLoss +from .utils import reduce_loss, weight_reduce_loss, weighted_loss + +__all__ = [ + 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', + 'mask_cross_entropy', 'CrossEntropyLoss', 'reduce_loss', + 'weight_reduce_loss', 'weighted_loss', 'LovaszLoss', 'DiceLoss', + 'FocalLoss' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/accuracy.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/accuracy.py new file mode 100644 index 0000000..1d9e2d7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/accuracy.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn + + +def accuracy(pred, target, topk=1, thresh=None, ignore_index=None): + """Calculate accuracy according to the prediction and target. + + Args: + pred (torch.Tensor): The model prediction, shape (N, num_class, ...) + target (torch.Tensor): The target of each prediction, shape (N, , ...) + ignore_index (int | None): The label index to be ignored. Default: None + topk (int | tuple[int], optional): If the predictions in ``topk`` + matches the target, the predictions will be regarded as + correct ones. Defaults to 1. + thresh (float, optional): If not None, predictions with scores under + this threshold are considered incorrect. Default to None. + + Returns: + float | tuple[float]: If the input ``topk`` is a single integer, + the function will return a single float as accuracy. If + ``topk`` is a tuple containing multiple integers, the + function will return a tuple containing accuracies of + each ``topk`` number. + """ + assert isinstance(topk, (int, tuple)) + if isinstance(topk, int): + topk = (topk, ) + return_single = True + else: + return_single = False + + maxk = max(topk) + if pred.size(0) == 0: + accu = [pred.new_tensor(0.) for i in range(len(topk))] + return accu[0] if return_single else accu + assert pred.ndim == target.ndim + 1 + assert pred.size(0) == target.size(0) + assert maxk <= pred.size(1), \ + f'maxk {maxk} exceeds pred dimension {pred.size(1)}' + pred_value, pred_label = pred.topk(maxk, dim=1) + # transpose to shape (maxk, N, ...) + pred_label = pred_label.transpose(0, 1) + correct = pred_label.eq(target.unsqueeze(0).expand_as(pred_label)) + if thresh is not None: + # Only prediction values larger than thresh are counted as correct + correct = correct & (pred_value > thresh).t() + if ignore_index is not None: + correct = correct[:, target != ignore_index] + res = [] + eps = torch.finfo(torch.float32).eps + for k in topk: + # Avoid causing ZeroDivisionError when all pixels + # of an image are ignored + correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) + eps + if ignore_index is not None: + total_num = target[target != ignore_index].numel() + eps + else: + total_num = target.numel() + eps + res.append(correct_k.mul_(100.0 / total_num)) + return res[0] if return_single else res + + +class Accuracy(nn.Module): + """Accuracy calculation module.""" + + def __init__(self, topk=(1, ), thresh=None, ignore_index=None): + """Module to calculate the accuracy. + + Args: + topk (tuple, optional): The criterion used to calculate the + accuracy. Defaults to (1,). + thresh (float, optional): If not None, predictions with scores + under this threshold are considered incorrect. Default to None. + """ + super().__init__() + self.topk = topk + self.thresh = thresh + self.ignore_index = ignore_index + + def forward(self, pred, target): + """Forward function to calculate accuracy. + + Args: + pred (torch.Tensor): Prediction of models. + target (torch.Tensor): Target for each prediction. + + Returns: + tuple[float]: The accuracies under different topk criterions. + """ + return accuracy(pred, target, self.topk, self.thresh, + self.ignore_index) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/cross_entropy_loss.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/cross_entropy_loss.py new file mode 100644 index 0000000..fe7b4a2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/cross_entropy_loss.py @@ -0,0 +1,296 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weight_reduce_loss + + +def cross_entropy(pred, + label, + weight=None, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=-100, + avg_non_ignore=False): + """cross_entropy. The wrapper function for :func:`F.cross_entropy` + + Args: + pred (torch.Tensor): The prediction with shape (N, 1). + label (torch.Tensor): The learning label of the prediction. + weight (torch.Tensor, optional): Sample-wise loss weight. + Default: None. + class_weight (list[float], optional): The weight for each class. + Default: None. + reduction (str, optional): The method used to reduce the loss. + Options are 'none', 'mean' and 'sum'. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. Default: None. + ignore_index (int): Specifies a target value that is ignored and + does not contribute to the input gradients. When + ``avg_non_ignore `` is ``True``, and the ``reduction`` is + ``''mean''``, the loss is averaged over non-ignored targets. + Defaults: -100. + avg_non_ignore (bool): The flag decides to whether the loss is + only averaged over non-ignored targets. Default: False. + `New in version 0.23.0.` + """ + + # class_weight is a manual rescaling weight given to each class. + # If given, has to be a Tensor of size C element-wise losses + loss = F.cross_entropy( + pred, + label, + weight=class_weight, + reduction='none', + ignore_index=ignore_index) + + # apply weights and do the reduction + # average loss over non-ignored elements + # pytorch's official cross_entropy average loss over non-ignored elements + # refer to https://github.com/pytorch/pytorch/blob/56b43f4fec1f76953f15a627694d4bba34588969/torch/nn/functional.py#L2660 # noqa + if (avg_factor is None) and avg_non_ignore and reduction == 'mean': + avg_factor = label.numel() - (label == ignore_index).sum().item() + if weight is not None: + weight = weight.float() + loss = weight_reduce_loss( + loss, weight=weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def _expand_onehot_labels(labels, label_weights, target_shape, ignore_index): + """Expand onehot labels to match the size of prediction.""" + bin_labels = labels.new_zeros(target_shape) + valid_mask = (labels >= 0) & (labels != ignore_index) + inds = torch.nonzero(valid_mask, as_tuple=True) + + if inds[0].numel() > 0: + if labels.dim() == 3: + bin_labels[inds[0], labels[valid_mask], inds[1], inds[2]] = 1 + else: + bin_labels[inds[0], labels[valid_mask]] = 1 + + valid_mask = valid_mask.unsqueeze(1).expand(target_shape).float() + + if label_weights is None: + bin_label_weights = valid_mask + else: + bin_label_weights = label_weights.unsqueeze(1).expand(target_shape) + bin_label_weights = bin_label_weights * valid_mask + + return bin_labels, bin_label_weights, valid_mask + + +def binary_cross_entropy(pred, + label, + weight=None, + reduction='mean', + avg_factor=None, + class_weight=None, + ignore_index=-100, + avg_non_ignore=False, + **kwargs): + """Calculate the binary CrossEntropy loss. + + Args: + pred (torch.Tensor): The prediction with shape (N, 1). + label (torch.Tensor): The learning label of the prediction. + Note: In bce loss, label < 0 is invalid. + weight (torch.Tensor, optional): Sample-wise loss weight. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (int): The label index to be ignored. Default: -100. + avg_non_ignore (bool): The flag decides to whether the loss is + only averaged over non-ignored targets. Default: False. + `New in version 0.23.0.` + + Returns: + torch.Tensor: The calculated loss + """ + if pred.size(1) == 1: + # For binary class segmentation, the shape of pred is + # [N, 1, H, W] and that of label is [N, H, W]. + # As the ignore_index often set as 255, so the + # binary class label check should mask out + # ignore_index + assert label[label != ignore_index].max() <= 1, \ + 'For pred with shape [N, 1, H, W], its label must have at ' \ + 'most 2 classes' + pred = pred.squeeze(1) + if pred.dim() != label.dim(): + assert (pred.dim() == 2 and label.dim() == 1) or ( + pred.dim() == 4 and label.dim() == 3), \ + 'Only pred shape [N, C], label shape [N] or pred shape [N, C, ' \ + 'H, W], label shape [N, H, W] are supported' + # `weight` returned from `_expand_onehot_labels` + # has been treated for valid (non-ignore) pixels + label, weight, valid_mask = _expand_onehot_labels( + label, weight, pred.shape, ignore_index) + else: + # should mask out the ignored elements + valid_mask = ((label >= 0) & (label != ignore_index)).float() + if weight is not None: + weight = weight * valid_mask + else: + weight = valid_mask + # average loss over non-ignored and valid elements + if reduction == 'mean' and avg_factor is None and avg_non_ignore: + avg_factor = valid_mask.sum().item() + + loss = F.binary_cross_entropy_with_logits( + pred, label.float(), pos_weight=class_weight, reduction='none') + # do the reduction for the weighted loss + loss = weight_reduce_loss( + loss, weight, reduction=reduction, avg_factor=avg_factor) + + return loss + + +def mask_cross_entropy(pred, + target, + label, + reduction='mean', + avg_factor=None, + class_weight=None, + ignore_index=None, + **kwargs): + """Calculate the CrossEntropy loss for masks. + + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the number + of classes. + target (torch.Tensor): The learning label of the prediction. + label (torch.Tensor): ``label`` indicates the class label of the mask' + corresponding object. This will be used to select the mask in the + of the class which the object belongs to when the mask prediction + if not class-agnostic. + reduction (str, optional): The method used to reduce the loss. + Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + class_weight (list[float], optional): The weight for each class. + ignore_index (None): Placeholder, to be consistent with other loss. + Default: None. + + Returns: + torch.Tensor: The calculated loss + """ + assert ignore_index is None, 'BCE loss does not support ignore_index' + # TODO: handle these two reserved arguments + assert reduction == 'mean' and avg_factor is None + num_rois = pred.size()[0] + inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device) + pred_slice = pred[inds, label].squeeze(1) + return F.binary_cross_entropy_with_logits( + pred_slice, target, weight=class_weight, reduction='mean')[None] + + +@LOSSES.register_module() +class CrossEntropyLoss(nn.Module): + """CrossEntropyLoss. + + Args: + use_sigmoid (bool, optional): Whether the prediction uses sigmoid + of softmax. Defaults to False. + use_mask (bool, optional): Whether to use mask cross entropy loss. + Defaults to False. + reduction (str, optional): . Defaults to 'mean'. + Options are "none", "mean" and "sum". + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + loss_name (str, optional): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_ce'. + avg_non_ignore (bool): The flag decides to whether the loss is + only averaged over non-ignored targets. Default: False. + `New in version 0.23.0.` + """ + + def __init__(self, + use_sigmoid=False, + use_mask=False, + reduction='mean', + class_weight=None, + loss_weight=1.0, + loss_name='loss_ce', + avg_non_ignore=False): + super(CrossEntropyLoss, self).__init__() + assert (use_sigmoid is False) or (use_mask is False) + self.use_sigmoid = use_sigmoid + self.use_mask = use_mask + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + self.avg_non_ignore = avg_non_ignore + if not self.avg_non_ignore and self.reduction == 'mean': + warnings.warn( + 'Default ``avg_non_ignore`` is False, if you would like to ' + 'ignore the certain label and average loss over non-ignore ' + 'labels, which is the same with PyTorch official ' + 'cross_entropy, set ``avg_non_ignore=True``.') + + if self.use_sigmoid: + self.cls_criterion = binary_cross_entropy + elif self.use_mask: + self.cls_criterion = mask_cross_entropy + else: + self.cls_criterion = cross_entropy + self._loss_name = loss_name + + def extra_repr(self): + """Extra repr.""" + s = f'avg_non_ignore={self.avg_non_ignore}' + return s + + def forward(self, + cls_score, + label, + weight=None, + avg_factor=None, + reduction_override=None, + ignore_index=-100, + **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + # Note: for BCE loss, label < 0 is invalid. + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + weight, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + avg_non_ignore=self.avg_non_ignore, + ignore_index=ignore_index, + **kwargs) + return loss_cls + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/dice_loss.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/dice_loss.py new file mode 100644 index 0000000..a294bc2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/dice_loss.py @@ -0,0 +1,137 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from https://github.com/LikeLy-Journey/SegmenTron/blob/master/ +segmentron/solver/loss.py (Apache-2.0 License)""" +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weighted_loss + + +@weighted_loss +def dice_loss(pred, + target, + valid_mask, + smooth=1, + exponent=2, + class_weight=None, + ignore_index=255): + assert pred.shape[0] == target.shape[0] + total_loss = 0 + num_classes = pred.shape[1] + for i in range(num_classes): + if i != ignore_index: + dice_loss = binary_dice_loss( + pred[:, i], + target[..., i], + valid_mask=valid_mask, + smooth=smooth, + exponent=exponent) + if class_weight is not None: + dice_loss *= class_weight[i] + total_loss += dice_loss + return total_loss / num_classes + + +@weighted_loss +def binary_dice_loss(pred, target, valid_mask, smooth=1, exponent=2, **kwargs): + assert pred.shape[0] == target.shape[0] + pred = pred.reshape(pred.shape[0], -1) + target = target.reshape(target.shape[0], -1) + valid_mask = valid_mask.reshape(valid_mask.shape[0], -1) + + num = torch.sum(torch.mul(pred, target) * valid_mask, dim=1) * 2 + smooth + den = torch.sum(pred.pow(exponent) + target.pow(exponent), dim=1) + smooth + + return 1 - num / den + + +@LOSSES.register_module() +class DiceLoss(nn.Module): + """DiceLoss. + + This loss is proposed in `V-Net: Fully Convolutional Neural Networks for + Volumetric Medical Image Segmentation `_. + + Args: + smooth (float): A float number to smooth loss, and avoid NaN error. + Default: 1 + exponent (float): An float number to calculate denominator + value: \\sum{x^exponent} + \\sum{y^exponent}. Default: 2. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Default to 1.0. + ignore_index (int | None): The label index to be ignored. Default: 255. + loss_name (str, optional): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_dice'. + """ + + def __init__(self, + smooth=1, + exponent=2, + reduction='mean', + class_weight=None, + loss_weight=1.0, + ignore_index=255, + loss_name='loss_dice', + **kwargs): + super(DiceLoss, self).__init__() + self.smooth = smooth + self.exponent = exponent + self.reduction = reduction + self.class_weight = get_class_weight(class_weight) + self.loss_weight = loss_weight + self.ignore_index = ignore_index + self._loss_name = loss_name + + def forward(self, + pred, + target, + avg_factor=None, + reduction_override=None, + **kwargs): + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = pred.new_tensor(self.class_weight) + else: + class_weight = None + + pred = F.softmax(pred, dim=1) + num_classes = pred.shape[1] + one_hot_target = F.one_hot( + torch.clamp(target.long(), 0, num_classes - 1), + num_classes=num_classes) + valid_mask = (target != self.ignore_index).long() + + loss = self.loss_weight * dice_loss( + pred, + one_hot_target, + valid_mask=valid_mask, + reduction=reduction, + avg_factor=avg_factor, + smooth=self.smooth, + exponent=self.exponent, + class_weight=class_weight, + ignore_index=self.ignore_index) + return loss + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/focal_loss.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/focal_loss.py new file mode 100644 index 0000000..af1c711 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/focal_loss.py @@ -0,0 +1,327 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# Modified from https://github.com/open-mmlab/mmdetection +import torch +import torch.nn as nn +import torch.nn.functional as F +from mmcv.ops import sigmoid_focal_loss as _sigmoid_focal_loss + +from ..builder import LOSSES +from .utils import weight_reduce_loss + + +# This method is used when cuda is not available +def py_sigmoid_focal_loss(pred, + target, + one_hot_target=None, + weight=None, + gamma=2.0, + alpha=0.5, + class_weight=None, + valid_mask=None, + reduction='mean', + avg_factor=None): + """PyTorch version of `Focal Loss `_. + + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the + number of classes + target (torch.Tensor): The learning label of the prediction with + shape (N, C) + one_hot_target (None): Placeholder. It should be None. + weight (torch.Tensor, optional): Sample-wise loss weight. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float | list[float], optional): A balanced form for Focal Loss. + Defaults to 0.5. + class_weight (list[float], optional): Weight of each class. + Defaults to None. + valid_mask (torch.Tensor, optional): A mask uses 1 to mark the valid + samples and uses 0 to mark the ignored samples. Default: None. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + """ + if isinstance(alpha, list): + alpha = pred.new_tensor(alpha) + pred_sigmoid = pred.sigmoid() + target = target.type_as(pred) + one_minus_pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) + focal_weight = (alpha * target + (1 - alpha) * + (1 - target)) * one_minus_pt.pow(gamma) + + loss = F.binary_cross_entropy_with_logits( + pred, target, reduction='none') * focal_weight + final_weight = torch.ones(1, pred.size(1)).type_as(loss) + if weight is not None: + if weight.shape != loss.shape and weight.size(0) == loss.size(0): + # For most cases, weight is of shape (N, ), + # which means it does not have the second axis num_class + weight = weight.view(-1, 1) + assert weight.dim() == loss.dim() + final_weight = final_weight * weight + if class_weight is not None: + final_weight = final_weight * pred.new_tensor(class_weight) + if valid_mask is not None: + final_weight = final_weight * valid_mask + loss = weight_reduce_loss(loss, final_weight, reduction, avg_factor) + return loss + + +def sigmoid_focal_loss(pred, + target, + one_hot_target, + weight=None, + gamma=2.0, + alpha=0.5, + class_weight=None, + valid_mask=None, + reduction='mean', + avg_factor=None): + r"""A warpper of cuda version `Focal Loss + `_. + Args: + pred (torch.Tensor): The prediction with shape (N, C), C is the number + of classes. + target (torch.Tensor): The learning label of the prediction. It's shape + should be (N, ) + one_hot_target (torch.Tensor): The learning label with shape (N, C) + weight (torch.Tensor, optional): Sample-wise loss weight. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float | list[float], optional): A balanced form for Focal Loss. + Defaults to 0.5. + class_weight (list[float], optional): Weight of each class. + Defaults to None. + valid_mask (torch.Tensor, optional): A mask uses 1 to mark the valid + samples and uses 0 to mark the ignored samples. Default: None. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. Options are "none", "mean" and "sum". + avg_factor (int, optional): Average factor that is used to average + the loss. Defaults to None. + """ + # Function.apply does not accept keyword arguments, so the decorator + # "weighted_loss" is not applicable + final_weight = torch.ones(1, pred.size(1)).type_as(pred) + if isinstance(alpha, list): + # _sigmoid_focal_loss doesn't accept alpha of list type. Therefore, if + # a list is given, we set the input alpha as 0.5. This means setting + # equal weight for foreground class and background class. By + # multiplying the loss by 2, the effect of setting alpha as 0.5 is + # undone. The alpha of type list is used to regulate the loss in the + # post-processing process. + loss = _sigmoid_focal_loss(pred.contiguous(), target.contiguous(), + gamma, 0.5, None, 'none') * 2 + alpha = pred.new_tensor(alpha) + final_weight = final_weight * ( + alpha * one_hot_target + (1 - alpha) * (1 - one_hot_target)) + else: + loss = _sigmoid_focal_loss(pred.contiguous(), target.contiguous(), + gamma, alpha, None, 'none') + if weight is not None: + if weight.shape != loss.shape and weight.size(0) == loss.size(0): + # For most cases, weight is of shape (N, ), + # which means it does not have the second axis num_class + weight = weight.view(-1, 1) + assert weight.dim() == loss.dim() + final_weight = final_weight * weight + if class_weight is not None: + final_weight = final_weight * pred.new_tensor(class_weight) + if valid_mask is not None: + final_weight = final_weight * valid_mask + loss = weight_reduce_loss(loss, final_weight, reduction, avg_factor) + return loss + + +@LOSSES.register_module() +class FocalLoss(nn.Module): + + def __init__(self, + use_sigmoid=True, + gamma=2.0, + alpha=0.5, + reduction='mean', + class_weight=None, + loss_weight=1.0, + loss_name='loss_focal'): + """`Focal Loss `_ + Args: + use_sigmoid (bool, optional): Whether to the prediction is + used for sigmoid or softmax. Defaults to True. + gamma (float, optional): The gamma for calculating the modulating + factor. Defaults to 2.0. + alpha (float | list[float], optional): A balanced form for Focal + Loss. Defaults to 0.5. When a list is provided, the length + of the list should be equal to the number of classes. + Please be careful that this parameter is not the + class-wise weight but the weight of a binary classification + problem. This binary classification problem regards the + pixels which belong to one class as the foreground + and the other pixels as the background, each element in + the list is the weight of the corresponding foreground class. + The value of alpha or each element of alpha should be a float + in the interval [0, 1]. If you want to specify the class-wise + weight, please use `class_weight` parameter. + reduction (str, optional): The method used to reduce the loss into + a scalar. Defaults to 'mean'. Options are "none", "mean" and + "sum". + class_weight (list[float], optional): Weight of each class. + Defaults to None. + loss_weight (float, optional): Weight of loss. Defaults to 1.0. + loss_name (str, optional): Name of the loss item. If you want this + loss item to be included into the backward graph, `loss_` must + be the prefix of the name. Defaults to 'loss_focal'. + """ + super(FocalLoss, self).__init__() + assert use_sigmoid is True, \ + 'AssertionError: Only sigmoid focal loss supported now.' + assert reduction in ('none', 'mean', 'sum'), \ + "AssertionError: reduction should be 'none', 'mean' or " \ + "'sum'" + assert isinstance(alpha, (float, list)), \ + 'AssertionError: alpha should be of type float' + assert isinstance(gamma, float), \ + 'AssertionError: gamma should be of type float' + assert isinstance(loss_weight, float), \ + 'AssertionError: loss_weight should be of type float' + assert isinstance(loss_name, str), \ + 'AssertionError: loss_name should be of type str' + assert isinstance(class_weight, list) or class_weight is None, \ + 'AssertionError: class_weight must be None or of type list' + self.use_sigmoid = use_sigmoid + self.gamma = gamma + self.alpha = alpha + self.reduction = reduction + self.class_weight = class_weight + self.loss_weight = loss_weight + self._loss_name = loss_name + + def forward(self, + pred, + target, + weight=None, + avg_factor=None, + reduction_override=None, + ignore_index=255, + **kwargs): + """Forward function. + + Args: + pred (torch.Tensor): The prediction with shape + (N, C) where C = number of classes, or + (N, C, d_1, d_2, ..., d_K) with K≥1 in the + case of K-dimensional loss. + target (torch.Tensor): The ground truth. If containing class + indices, shape (N) where each value is 0≤targets[i]≤C−1, + or (N, d_1, d_2, ..., d_K) with K≥1 in the case of + K-dimensional loss. If containing class probabilities, + same shape as the input. + weight (torch.Tensor, optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (int, optional): Average factor that is used to + average the loss. Defaults to None. + reduction_override (str, optional): The reduction method used + to override the original reduction method of the loss. + Options are "none", "mean" and "sum". + ignore_index (int, optional): The label index to be ignored. + Default: 255 + Returns: + torch.Tensor: The calculated loss + """ + assert isinstance(ignore_index, int), \ + 'ignore_index must be of type int' + assert reduction_override in (None, 'none', 'mean', 'sum'), \ + "AssertionError: reduction should be 'none', 'mean' or " \ + "'sum'" + assert pred.shape == target.shape or \ + (pred.size(0) == target.size(0) and + pred.shape[2:] == target.shape[1:]), \ + "The shape of pred doesn't match the shape of target" + + original_shape = pred.shape + + # [B, C, d_1, d_2, ..., d_k] -> [C, B, d_1, d_2, ..., d_k] + pred = pred.transpose(0, 1) + # [C, B, d_1, d_2, ..., d_k] -> [C, N] + pred = pred.reshape(pred.size(0), -1) + # [C, N] -> [N, C] + pred = pred.transpose(0, 1).contiguous() + + if original_shape == target.shape: + # target with shape [B, C, d_1, d_2, ...] + # transform it's shape into [N, C] + # [B, C, d_1, d_2, ...] -> [C, B, d_1, d_2, ..., d_k] + target = target.transpose(0, 1) + # [C, B, d_1, d_2, ..., d_k] -> [C, N] + target = target.reshape(target.size(0), -1) + # [C, N] -> [N, C] + target = target.transpose(0, 1).contiguous() + else: + # target with shape [B, d_1, d_2, ...] + # transform it's shape into [N, ] + target = target.view(-1).contiguous() + valid_mask = (target != ignore_index).view(-1, 1) + # avoid raising error when using F.one_hot() + target = torch.where(target == ignore_index, target.new_tensor(0), + target) + + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.use_sigmoid: + num_classes = pred.size(1) + if torch.cuda.is_available() and pred.is_cuda: + if target.dim() == 1: + one_hot_target = F.one_hot(target, num_classes=num_classes) + else: + one_hot_target = target + target = target.argmax(dim=1) + valid_mask = (target != ignore_index).view(-1, 1) + calculate_loss_func = sigmoid_focal_loss + else: + one_hot_target = None + if target.dim() == 1: + target = F.one_hot(target, num_classes=num_classes) + else: + valid_mask = (target.argmax(dim=1) != ignore_index).view( + -1, 1) + calculate_loss_func = py_sigmoid_focal_loss + + loss_cls = self.loss_weight * calculate_loss_func( + pred, + target, + one_hot_target, + weight, + gamma=self.gamma, + alpha=self.alpha, + class_weight=self.class_weight, + valid_mask=valid_mask, + reduction=reduction, + avg_factor=avg_factor) + + if reduction == 'none': + # [N, C] -> [C, N] + loss_cls = loss_cls.transpose(0, 1) + # [C, N] -> [C, B, d1, d2, ...] + # original_shape: [B, C, d1, d2, ...] + loss_cls = loss_cls.reshape(original_shape[1], + original_shape[0], + *original_shape[2:]) + # [C, B, d1, d2, ...] -> [B, C, d1, d2, ...] + loss_cls = loss_cls.transpose(0, 1).contiguous() + else: + raise NotImplementedError + return loss_cls + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/lovasz_loss.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/lovasz_loss.py new file mode 100644 index 0000000..2bb0fad --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/lovasz_loss.py @@ -0,0 +1,323 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from https://github.com/bermanmaxim/LovaszSoftmax/blob/master/pytor +ch/lovasz_losses.py Lovasz-Softmax and Jaccard hinge loss in PyTorch Maxim +Berman 2018 ESAT-PSI KU Leuven (MIT License)""" + +import mmcv +import torch +import torch.nn as nn +import torch.nn.functional as F + +from ..builder import LOSSES +from .utils import get_class_weight, weight_reduce_loss + + +def lovasz_grad(gt_sorted): + """Computes gradient of the Lovasz extension w.r.t sorted errors. + + See Alg. 1 in paper. + """ + p = len(gt_sorted) + gts = gt_sorted.sum() + intersection = gts - gt_sorted.float().cumsum(0) + union = gts + (1 - gt_sorted).float().cumsum(0) + jaccard = 1. - intersection / union + if p > 1: # cover 1-pixel case + jaccard[1:p] = jaccard[1:p] - jaccard[0:-1] + return jaccard + + +def flatten_binary_logits(logits, labels, ignore_index=None): + """Flattens predictions in the batch (binary case) Remove labels equal to + 'ignore_index'.""" + logits = logits.view(-1) + labels = labels.view(-1) + if ignore_index is None: + return logits, labels + valid = (labels != ignore_index) + vlogits = logits[valid] + vlabels = labels[valid] + return vlogits, vlabels + + +def flatten_probs(probs, labels, ignore_index=None): + """Flattens predictions in the batch.""" + if probs.dim() == 3: + # assumes output of a sigmoid layer + B, H, W = probs.size() + probs = probs.view(B, 1, H, W) + B, C, H, W = probs.size() + probs = probs.permute(0, 2, 3, 1).contiguous().view(-1, C) # B*H*W, C=P,C + labels = labels.view(-1) + if ignore_index is None: + return probs, labels + valid = (labels != ignore_index) + vprobs = probs[valid.nonzero().squeeze()] + vlabels = labels[valid] + return vprobs, vlabels + + +def lovasz_hinge_flat(logits, labels): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [P], logits at each prediction + (between -infty and +infty). + labels (torch.Tensor): [P], binary ground truth labels (0 or 1). + + Returns: + torch.Tensor: The calculated loss. + """ + if len(labels) == 0: + # only void pixels, the gradients should be 0 + return logits.sum() * 0. + signs = 2. * labels.float() - 1. + errors = (1. - logits * signs) + errors_sorted, perm = torch.sort(errors, dim=0, descending=True) + perm = perm.data + gt_sorted = labels[perm] + grad = lovasz_grad(gt_sorted) + loss = torch.dot(F.relu(errors_sorted), grad) + return loss + + +def lovasz_hinge(logits, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255): + """Binary Lovasz hinge loss. + + Args: + logits (torch.Tensor): [B, H, W], logits at each pixel + (between -infty and +infty). + labels (torch.Tensor): [B, H, W], binary ground truth masks (0 or 1). + classes (str | list[int], optional): Placeholder, to be consistent with + other loss. Default: None. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): Placeholder, to be consistent + with other loss. Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + if per_image: + loss = [ + lovasz_hinge_flat(*flatten_binary_logits( + logit.unsqueeze(0), label.unsqueeze(0), ignore_index)) + for logit, label in zip(logits, labels) + ] + loss = weight_reduce_loss( + torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_hinge_flat( + *flatten_binary_logits(logits, labels, ignore_index)) + return loss + + +def lovasz_softmax_flat(probs, labels, classes='present', class_weight=None): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [P, C], class probabilities at each prediction + (between 0 and 1). + labels (torch.Tensor): [P], ground truth labels (between 0 and C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + class_weight (list[float], optional): The weight for each class. + Default: None. + + Returns: + torch.Tensor: The calculated loss. + """ + if probs.numel() == 0: + # only void pixels, the gradients should be 0 + return probs * 0. + C = probs.size(1) + losses = [] + class_to_sum = list(range(C)) if classes in ['all', 'present'] else classes + for c in class_to_sum: + fg = (labels == c).float() # foreground for class c + if (classes == 'present' and fg.sum() == 0): + continue + if C == 1: + if len(classes) > 1: + raise ValueError('Sigmoid output possible only with 1 class') + class_pred = probs[:, 0] + else: + class_pred = probs[:, c] + errors = (fg - class_pred).abs() + errors_sorted, perm = torch.sort(errors, 0, descending=True) + perm = perm.data + fg_sorted = fg[perm] + loss = torch.dot(errors_sorted, lovasz_grad(fg_sorted)) + if class_weight is not None: + loss *= class_weight[c] + losses.append(loss) + return torch.stack(losses).mean() + + +def lovasz_softmax(probs, + labels, + classes='present', + per_image=False, + class_weight=None, + reduction='mean', + avg_factor=None, + ignore_index=255): + """Multi-class Lovasz-Softmax loss. + + Args: + probs (torch.Tensor): [B, C, H, W], class probabilities at each + prediction (between 0 and 1). + labels (torch.Tensor): [B, H, W], ground truth labels (between 0 and + C - 1). + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + class_weight (list[float], optional): The weight for each class. + Default: None. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + avg_factor (int, optional): Average factor that is used to average + the loss. This parameter only works when per_image is True. + Default: None. + ignore_index (int | None): The label index to be ignored. Default: 255. + + Returns: + torch.Tensor: The calculated loss. + """ + + if per_image: + loss = [ + lovasz_softmax_flat( + *flatten_probs( + prob.unsqueeze(0), label.unsqueeze(0), ignore_index), + classes=classes, + class_weight=class_weight) + for prob, label in zip(probs, labels) + ] + loss = weight_reduce_loss( + torch.stack(loss), None, reduction, avg_factor) + else: + loss = lovasz_softmax_flat( + *flatten_probs(probs, labels, ignore_index), + classes=classes, + class_weight=class_weight) + return loss + + +@LOSSES.register_module() +class LovaszLoss(nn.Module): + """LovaszLoss. + + This loss is proposed in `The Lovasz-Softmax loss: A tractable surrogate + for the optimization of the intersection-over-union measure in neural + networks `_. + + Args: + loss_type (str, optional): Binary or multi-class loss. + Default: 'multi_class'. Options are "binary" and "multi_class". + classes (str | list[int], optional): Classes chosen to calculate loss. + 'all' for all classes, 'present' for classes present in labels, or + a list of classes to average. Default: 'present'. + per_image (bool, optional): If per_image is True, compute the loss per + image instead of per batch. Default: False. + reduction (str, optional): The method used to reduce the loss. Options + are "none", "mean" and "sum". This parameter only works when + per_image is True. Default: 'mean'. + class_weight (list[float] | str, optional): Weight of each class. If in + str format, read them from a file. Defaults to None. + loss_weight (float, optional): Weight of the loss. Defaults to 1.0. + loss_name (str, optional): Name of the loss item. If you want this loss + item to be included into the backward graph, `loss_` must be the + prefix of the name. Defaults to 'loss_lovasz'. + """ + + def __init__(self, + loss_type='multi_class', + classes='present', + per_image=False, + reduction='mean', + class_weight=None, + loss_weight=1.0, + loss_name='loss_lovasz'): + super(LovaszLoss, self).__init__() + assert loss_type in ('binary', 'multi_class'), "loss_type should be \ + 'binary' or 'multi_class'." + + if loss_type == 'binary': + self.cls_criterion = lovasz_hinge + else: + self.cls_criterion = lovasz_softmax + assert classes in ('all', 'present') or mmcv.is_list_of(classes, int) + if not per_image: + assert reduction == 'none', "reduction should be 'none' when \ + per_image is False." + + self.classes = classes + self.per_image = per_image + self.reduction = reduction + self.loss_weight = loss_weight + self.class_weight = get_class_weight(class_weight) + self._loss_name = loss_name + + def forward(self, + cls_score, + label, + weight=None, + avg_factor=None, + reduction_override=None, + **kwargs): + """Forward function.""" + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if self.class_weight is not None: + class_weight = cls_score.new_tensor(self.class_weight) + else: + class_weight = None + + # if multi-class loss, transform logits to probs + if self.cls_criterion == lovasz_softmax: + cls_score = F.softmax(cls_score, dim=1) + + loss_cls = self.loss_weight * self.cls_criterion( + cls_score, + label, + self.classes, + self.per_image, + class_weight=class_weight, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss_cls + + @property + def loss_name(self): + """Loss Name. + + This function must be implemented and will return the name of this + loss function. This name will be used to combine different loss items + by simple sum operation. In addition, if you want this loss item to be + included into the backward graph, `loss_` must be the prefix of the + name. + Returns: + str: The name of this loss item. + """ + return self._loss_name diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/utils.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/utils.py new file mode 100644 index 0000000..621f57c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/losses/utils.py @@ -0,0 +1,126 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import functools + +import mmcv +import numpy as np +import torch +import torch.nn.functional as F + + +def get_class_weight(class_weight): + """Get class weight for loss function. + + Args: + class_weight (list[float] | str | None): If class_weight is a str, + take it as a file name and read from it. + """ + if isinstance(class_weight, str): + # take it as a file path + if class_weight.endswith('.npy'): + class_weight = np.load(class_weight) + else: + # pkl, json or yaml + class_weight = mmcv.load(class_weight) + + return class_weight + + +def reduce_loss(loss, reduction): + """Reduce loss as specified. + + Args: + loss (Tensor): Elementwise loss tensor. + reduction (str): Options are "none", "mean" and "sum". + + Return: + Tensor: Reduced loss tensor. + """ + reduction_enum = F._Reduction.get_enum(reduction) + # none: 0, elementwise_mean:1, sum: 2 + if reduction_enum == 0: + return loss + elif reduction_enum == 1: + return loss.mean() + elif reduction_enum == 2: + return loss.sum() + + +def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): + """Apply element-wise weight and reduce loss. + + Args: + loss (Tensor): Element-wise loss. + weight (Tensor): Element-wise weights. + reduction (str): Same as built-in losses of PyTorch. + avg_factor (float): Average factor when computing the mean of losses. + + Returns: + Tensor: Processed loss values. + """ + # if weight is specified, apply element-wise weight + if weight is not None: + assert weight.dim() == loss.dim() + if weight.dim() > 1: + assert weight.size(1) == 1 or weight.size(1) == loss.size(1) + loss = loss * weight + + # if avg_factor is not specified, just reduce the loss + if avg_factor is None: + loss = reduce_loss(loss, reduction) + else: + # if reduction is mean, then average the loss by avg_factor + if reduction == 'mean': + # Avoid causing ZeroDivisionError when avg_factor is 0.0, + # i.e., all labels of an image belong to ignore index. + eps = torch.finfo(torch.float32).eps + loss = loss.sum() / (avg_factor + eps) + # if reduction is 'none', then do nothing, otherwise raise an error + elif reduction != 'none': + raise ValueError('avg_factor can not be used with reduction="sum"') + return loss + + +def weighted_loss(loss_func): + """Create a weighted version of a given loss function. + + To use this decorator, the loss function must have the signature like + `loss_func(pred, target, **kwargs)`. The function only needs to compute + element-wise loss without any reduction. This decorator will add weight + and reduction arguments to the function. The decorated function will have + the signature like `loss_func(pred, target, weight=None, reduction='mean', + avg_factor=None, **kwargs)`. + + :Example: + + >>> import torch + >>> @weighted_loss + >>> def l1_loss(pred, target): + >>> return (pred - target).abs() + + >>> pred = torch.Tensor([0, 2, 3]) + >>> target = torch.Tensor([1, 1, 1]) + >>> weight = torch.Tensor([1, 0, 1]) + + >>> l1_loss(pred, target) + tensor(1.3333) + >>> l1_loss(pred, target, weight) + tensor(1.) + >>> l1_loss(pred, target, reduction='none') + tensor([1., 1., 2.]) + >>> l1_loss(pred, target, weight, avg_factor=2) + tensor(1.5000) + """ + + @functools.wraps(loss_func) + def wrapper(pred, + target, + weight=None, + reduction='mean', + avg_factor=None, + **kwargs): + # get element-wise loss + loss = loss_func(pred, target, **kwargs) + loss = weight_reduce_loss(loss, weight, reduction, avg_factor) + return loss + + return wrapper diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/__init__.py new file mode 100644 index 0000000..ff03186 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .featurepyramid import Feature2Pyramid +from .fpn import FPN +from .ic_neck import ICNeck +from .jpu import JPU +from .mla_neck import MLANeck +from .multilevel_neck import MultiLevelNeck + +__all__ = [ + 'FPN', 'MultiLevelNeck', 'MLANeck', 'ICNeck', 'JPU', 'Feature2Pyramid' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/featurepyramid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/featurepyramid.py new file mode 100644 index 0000000..82a00ce --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/featurepyramid.py @@ -0,0 +1,67 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from mmcv.cnn import build_norm_layer + +from ..builder import NECKS + + +@NECKS.register_module() +class Feature2Pyramid(nn.Module): + """Feature2Pyramid. + + A neck structure connect ViT backbone and decoder_heads. + + Args: + embed_dims (int): Embedding dimension. + rescales (list[float]): Different sampling multiples were + used to obtain pyramid features. Default: [4, 2, 1, 0.5]. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='SyncBN', requires_grad=True). + """ + + def __init__(self, + embed_dim, + rescales=[4, 2, 1, 0.5], + norm_cfg=dict(type='SyncBN', requires_grad=True)): + super(Feature2Pyramid, self).__init__() + self.rescales = rescales + self.upsample_4x = None + for k in self.rescales: + if k == 4: + self.upsample_4x = nn.Sequential( + nn.ConvTranspose2d( + embed_dim, embed_dim, kernel_size=2, stride=2), + build_norm_layer(norm_cfg, embed_dim)[1], + nn.GELU(), + nn.ConvTranspose2d( + embed_dim, embed_dim, kernel_size=2, stride=2), + ) + elif k == 2: + self.upsample_2x = nn.Sequential( + nn.ConvTranspose2d( + embed_dim, embed_dim, kernel_size=2, stride=2)) + elif k == 1: + self.identity = nn.Identity() + elif k == 0.5: + self.downsample_2x = nn.MaxPool2d(kernel_size=2, stride=2) + elif k == 0.25: + self.downsample_4x = nn.MaxPool2d(kernel_size=4, stride=4) + else: + raise KeyError(f'invalid {k} for feature2pyramid') + + def forward(self, inputs): + assert len(inputs) == len(self.rescales) + outputs = [] + if self.upsample_4x is not None: + ops = [ + self.upsample_4x, self.upsample_2x, self.identity, + self.downsample_2x + ] + else: + ops = [ + self.upsample_2x, self.identity, self.downsample_2x, + self.downsample_4x + ] + for i in range(len(inputs)): + outputs.append(ops[i](inputs[i])) + return tuple(outputs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/fpn.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/fpn.py new file mode 100644 index 0000000..6997de9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/fpn.py @@ -0,0 +1,213 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmcv.runner import BaseModule, auto_fp16 + +from mmseg.ops import resize +from ..builder import NECKS + + +@NECKS.register_module() +class FPN(BaseModule): + """Feature Pyramid Network. + + This neck is the implementation of `Feature Pyramid Networks for Object + Detection `_. + + Args: + in_channels (list[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + num_outs (int): Number of output scales. + start_level (int): Index of the start input backbone level used to + build the feature pyramid. Default: 0. + end_level (int): Index of the end input backbone level (exclusive) to + build the feature pyramid. Default: -1, which means the last level. + add_extra_convs (bool | str): If bool, it decides whether to add conv + layers on top of the original feature maps. Default to False. + If True, its actual mode is specified by `extra_convs_on_inputs`. + If str, it specifies the source feature map of the extra convs. + Only the following options are allowed + + - 'on_input': Last feat map of neck inputs (i.e. backbone feature). + - 'on_lateral': Last feature map after lateral convs. + - 'on_output': The last output feature map after fpn convs. + extra_convs_on_inputs (bool, deprecated): Whether to apply extra convs + on the original feature from the backbone. If True, + it is equivalent to `add_extra_convs='on_input'`. If False, it is + equivalent to set `add_extra_convs='on_output'`. Default to True. + relu_before_extra_convs (bool): Whether to apply relu before the extra + conv. Default: False. + no_norm_on_lateral (bool): Whether to apply norm on lateral. + Default: False. + conv_cfg (dict): Config dict for convolution layer. Default: None. + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: None. + upsample_cfg (dict): Config dict for interpolate layer. + Default: dict(mode='nearest'). + init_cfg (dict or list[dict], optional): Initialization config dict. + + Example: + >>> import torch + >>> in_channels = [2, 3, 5, 7] + >>> scales = [340, 170, 84, 43] + >>> inputs = [torch.rand(1, c, s, s) + ... for c, s in zip(in_channels, scales)] + >>> self = FPN(in_channels, 11, len(in_channels)).eval() + >>> outputs = self.forward(inputs) + >>> for i in range(len(outputs)): + ... print(f'outputs[{i}].shape = {outputs[i].shape}') + outputs[0].shape = torch.Size([1, 11, 340, 340]) + outputs[1].shape = torch.Size([1, 11, 170, 170]) + outputs[2].shape = torch.Size([1, 11, 84, 84]) + outputs[3].shape = torch.Size([1, 11, 43, 43]) + """ + + def __init__(self, + in_channels, + out_channels, + num_outs, + start_level=0, + end_level=-1, + add_extra_convs=False, + extra_convs_on_inputs=False, + relu_before_extra_convs=False, + no_norm_on_lateral=False, + conv_cfg=None, + norm_cfg=None, + act_cfg=None, + upsample_cfg=dict(mode='nearest'), + init_cfg=dict( + type='Xavier', layer='Conv2d', distribution='uniform')): + super(FPN, self).__init__(init_cfg) + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.num_ins = len(in_channels) + self.num_outs = num_outs + self.relu_before_extra_convs = relu_before_extra_convs + self.no_norm_on_lateral = no_norm_on_lateral + self.fp16_enabled = False + self.upsample_cfg = upsample_cfg.copy() + + if end_level == -1: + self.backbone_end_level = self.num_ins + assert num_outs >= self.num_ins - start_level + else: + # if end_level < inputs, no extra level is allowed + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + assert num_outs == end_level - start_level + self.start_level = start_level + self.end_level = end_level + self.add_extra_convs = add_extra_convs + assert isinstance(add_extra_convs, (str, bool)) + if isinstance(add_extra_convs, str): + # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output' + assert add_extra_convs in ('on_input', 'on_lateral', 'on_output') + elif add_extra_convs: # True + if extra_convs_on_inputs: + # For compatibility with previous release + # TODO: deprecate `extra_convs_on_inputs` + self.add_extra_convs = 'on_input' + else: + self.add_extra_convs = 'on_output' + + self.lateral_convs = nn.ModuleList() + self.fpn_convs = nn.ModuleList() + + for i in range(self.start_level, self.backbone_end_level): + l_conv = ConvModule( + in_channels[i], + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg if not self.no_norm_on_lateral else None, + act_cfg=act_cfg, + inplace=False) + fpn_conv = ConvModule( + out_channels, + out_channels, + 3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False) + + self.lateral_convs.append(l_conv) + self.fpn_convs.append(fpn_conv) + + # add extra conv layers (e.g., RetinaNet) + extra_levels = num_outs - self.backbone_end_level + self.start_level + if self.add_extra_convs and extra_levels >= 1: + for i in range(extra_levels): + if i == 0 and self.add_extra_convs == 'on_input': + in_channels = self.in_channels[self.backbone_end_level - 1] + else: + in_channels = out_channels + extra_fpn_conv = ConvModule( + in_channels, + out_channels, + 3, + stride=2, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + inplace=False) + self.fpn_convs.append(extra_fpn_conv) + + @auto_fp16() + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + + # build laterals + laterals = [ + lateral_conv(inputs[i + self.start_level]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + + # build top-down path + used_backbone_levels = len(laterals) + for i in range(used_backbone_levels - 1, 0, -1): + # In some cases, fixing `scale factor` (e.g. 2) is preferred, but + # it cannot co-exist with `size` in `F.interpolate`. + if 'scale_factor' in self.upsample_cfg: + laterals[i - 1] = laterals[i - 1] + resize( + laterals[i], **self.upsample_cfg) + else: + prev_shape = laterals[i - 1].shape[2:] + laterals[i - 1] = laterals[i - 1] + resize( + laterals[i], size=prev_shape, **self.upsample_cfg) + + # build outputs + # part 1: from original levels + outs = [ + self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) + ] + # part 2: add extra levels + if self.num_outs > len(outs): + # use max pool to get more levels on top of outputs + # (e.g., Faster R-CNN, Mask R-CNN) + if not self.add_extra_convs: + for i in range(self.num_outs - used_backbone_levels): + outs.append(F.max_pool2d(outs[-1], 1, stride=2)) + # add conv layers on top of original feature maps (RetinaNet) + else: + if self.add_extra_convs == 'on_input': + extra_source = inputs[self.backbone_end_level - 1] + elif self.add_extra_convs == 'on_lateral': + extra_source = laterals[-1] + elif self.add_extra_convs == 'on_output': + extra_source = outs[-1] + else: + raise NotImplementedError + outs.append(self.fpn_convs[used_backbone_levels](extra_source)) + for i in range(used_backbone_levels + 1, self.num_outs): + if self.relu_before_extra_convs: + outs.append(self.fpn_convs[i](F.relu(outs[-1]))) + else: + outs.append(self.fpn_convs[i](outs[-1])) + return tuple(outs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/ic_neck.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/ic_neck.py new file mode 100644 index 0000000..a5d81ce --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/ic_neck.py @@ -0,0 +1,148 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn.functional as F +from mmcv.cnn import ConvModule +from mmcv.runner import BaseModule + +from mmseg.ops import resize +from ..builder import NECKS + + +class CascadeFeatureFusion(BaseModule): + """Cascade Feature Fusion Unit in ICNet. + + Args: + low_channels (int): The number of input channels for + low resolution feature map. + high_channels (int): The number of input channels for + high resolution feature map. + out_channels (int): The number of output channels. + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN'). + act_cfg (dict): Dictionary to construct and config act layer. + Default: dict(type='ReLU'). + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + + Returns: + x (Tensor): The output tensor of shape (N, out_channels, H, W). + x_low (Tensor): The output tensor of shape (N, out_channels, H, W) + for Cascade Label Guidance in auxiliary heads. + """ + + def __init__(self, + low_channels, + high_channels, + out_channels, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + init_cfg=None): + super(CascadeFeatureFusion, self).__init__(init_cfg=init_cfg) + self.align_corners = align_corners + self.conv_low = ConvModule( + low_channels, + out_channels, + 3, + padding=2, + dilation=2, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.conv_high = ConvModule( + high_channels, + out_channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, x_low, x_high): + x_low = resize( + x_low, + size=x_high.size()[2:], + mode='bilinear', + align_corners=self.align_corners) + # Note: Different from original paper, `x_low` is underwent + # `self.conv_low` rather than another 1x1 conv classifier + # before being used for auxiliary head. + x_low = self.conv_low(x_low) + x_high = self.conv_high(x_high) + x = x_low + x_high + x = F.relu(x, inplace=True) + return x, x_low + + +@NECKS.register_module() +class ICNeck(BaseModule): + """ICNet for Real-Time Semantic Segmentation on High-Resolution Images. + + This head is the implementation of `ICHead + `_. + + Args: + in_channels (int): The number of input image channels. Default: 3. + out_channels (int): The numbers of output feature channels. + Default: 128. + conv_cfg (dict): Dictionary to construct and config conv layer. + Default: None. + norm_cfg (dict): Dictionary to construct and config norm layer. + Default: dict(type='BN'). + act_cfg (dict): Dictionary to construct and config act layer. + Default: dict(type='ReLU'). + align_corners (bool): align_corners argument of F.interpolate. + Default: False. + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels=(64, 256, 256), + out_channels=128, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + init_cfg=None): + super(ICNeck, self).__init__(init_cfg=init_cfg) + assert len(in_channels) == 3, 'Length of input channels \ + must be 3!' + + self.in_channels = in_channels + self.out_channels = out_channels + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.align_corners = align_corners + self.cff_24 = CascadeFeatureFusion( + self.in_channels[2], + self.in_channels[1], + self.out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + + self.cff_12 = CascadeFeatureFusion( + self.out_channels, + self.in_channels[0], + self.out_channels, + conv_cfg=self.conv_cfg, + norm_cfg=self.norm_cfg, + act_cfg=self.act_cfg, + align_corners=self.align_corners) + + def forward(self, inputs): + assert len(inputs) == 3, 'Length of input feature \ + maps must be 3!' + + x_sub1, x_sub2, x_sub4 = inputs + x_cff_24, x_24 = self.cff_24(x_sub4, x_sub2) + x_cff_12, x_12 = self.cff_12(x_cff_24, x_sub1) + # Note: `x_cff_12` is used for decode_head, + # `x_24` and `x_12` are used for auxiliary head. + return x_24, x_12, x_cff_12 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/jpu.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/jpu.py new file mode 100644 index 0000000..3cc6b9f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/jpu.py @@ -0,0 +1,131 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule +from mmcv.runner import BaseModule + +from mmseg.ops import resize +from ..builder import NECKS + + +@NECKS.register_module() +class JPU(BaseModule): + """FastFCN: Rethinking Dilated Convolution in the Backbone + for Semantic Segmentation. + + This Joint Pyramid Upsampling (JPU) neck is the implementation of + `FastFCN `_. + + Args: + in_channels (Tuple[int], optional): The number of input channels + for each convolution operations before upsampling. + Default: (512, 1024, 2048). + mid_channels (int): The number of output channels of JPU. + Default: 512. + start_level (int): Index of the start input backbone level used to + build the feature pyramid. Default: 0. + end_level (int): Index of the end input backbone level (exclusive) to + build the feature pyramid. Default: -1, which means the last level. + dilations (tuple[int]): Dilation rate of each Depthwise + Separable ConvModule. Default: (1, 2, 4, 8). + align_corners (bool, optional): The align_corners argument of + resize operation. Default: False. + conv_cfg (dict | None): Config of conv layers. + Default: None. + norm_cfg (dict | None): Config of norm layers. + Default: dict(type='BN'). + act_cfg (dict): Config of activation layers. + Default: dict(type='ReLU'). + init_cfg (dict or list[dict], optional): Initialization config dict. + Default: None. + """ + + def __init__(self, + in_channels=(512, 1024, 2048), + mid_channels=512, + start_level=0, + end_level=-1, + dilations=(1, 2, 4, 8), + align_corners=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + init_cfg=None): + super(JPU, self).__init__(init_cfg=init_cfg) + assert isinstance(in_channels, tuple) + assert isinstance(dilations, tuple) + self.in_channels = in_channels + self.mid_channels = mid_channels + self.start_level = start_level + self.num_ins = len(in_channels) + if end_level == -1: + self.backbone_end_level = self.num_ins + else: + self.backbone_end_level = end_level + assert end_level <= len(in_channels) + + self.dilations = dilations + self.align_corners = align_corners + + self.conv_layers = nn.ModuleList() + self.dilation_layers = nn.ModuleList() + for i in range(self.start_level, self.backbone_end_level): + conv_layer = nn.Sequential( + ConvModule( + self.in_channels[i], + self.mid_channels, + kernel_size=3, + padding=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.conv_layers.append(conv_layer) + for i in range(len(dilations)): + dilation_layer = nn.Sequential( + DepthwiseSeparableConvModule( + in_channels=(self.backbone_end_level - self.start_level) * + self.mid_channels, + out_channels=self.mid_channels, + kernel_size=3, + stride=1, + padding=dilations[i], + dilation=dilations[i], + dw_norm_cfg=norm_cfg, + dw_act_cfg=None, + pw_norm_cfg=norm_cfg, + pw_act_cfg=act_cfg)) + self.dilation_layers.append(dilation_layer) + + def forward(self, inputs): + """Forward function.""" + assert len(inputs) == len(self.in_channels), 'Length of inputs must \ + be the same with self.in_channels!' + + feats = [ + self.conv_layers[i - self.start_level](inputs[i]) + for i in range(self.start_level, self.backbone_end_level) + ] + + h, w = feats[0].shape[2:] + for i in range(1, len(feats)): + feats[i] = resize( + feats[i], + size=(h, w), + mode='bilinear', + align_corners=self.align_corners) + + feat = torch.cat(feats, dim=1) + concat_feat = torch.cat([ + self.dilation_layers[i](feat) for i in range(len(self.dilations)) + ], + dim=1) + + outs = [] + + # Default: outs[2] is the output of JPU for decoder head, outs[1] is + # the feature map from backbone for auxiliary head. Additionally, + # outs[0] can also be used for auxiliary head. + for i in range(self.start_level, self.backbone_end_level - 1): + outs.append(inputs[i]) + outs.append(concat_feat) + return tuple(outs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/mla_neck.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/mla_neck.py new file mode 100644 index 0000000..1513e29 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/mla_neck.py @@ -0,0 +1,118 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from mmcv.cnn import ConvModule, build_norm_layer + +from ..builder import NECKS + + +class MLAModule(nn.Module): + + def __init__(self, + in_channels=[1024, 1024, 1024, 1024], + out_channels=256, + norm_cfg=None, + act_cfg=None): + super(MLAModule, self).__init__() + self.channel_proj = nn.ModuleList() + for i in range(len(in_channels)): + self.channel_proj.append( + ConvModule( + in_channels=in_channels[i], + out_channels=out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + self.feat_extract = nn.ModuleList() + for i in range(len(in_channels)): + self.feat_extract.append( + ConvModule( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + padding=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + def forward(self, inputs): + + # feat_list -> [p2, p3, p4, p5] + feat_list = [] + for x, conv in zip(inputs, self.channel_proj): + feat_list.append(conv(x)) + + # feat_list -> [p5, p4, p3, p2] + # mid_list -> [m5, m4, m3, m2] + feat_list = feat_list[::-1] + mid_list = [] + for feat in feat_list: + if len(mid_list) == 0: + mid_list.append(feat) + else: + mid_list.append(mid_list[-1] + feat) + + # mid_list -> [m5, m4, m3, m2] + # out_list -> [o2, o3, o4, o5] + out_list = [] + for mid, conv in zip(mid_list, self.feat_extract): + out_list.append(conv(mid)) + + return tuple(out_list) + + +@NECKS.register_module() +class MLANeck(nn.Module): + """Multi-level Feature Aggregation. + + This neck is `The Multi-level Feature Aggregation construction of + SETR `_. + + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + norm_layer (dict): Config dict for input normalization. + Default: norm_layer=dict(type='LN', eps=1e-6, requires_grad=True). + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + norm_layer=dict(type='LN', eps=1e-6, requires_grad=True), + norm_cfg=None, + act_cfg=None): + super(MLANeck, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + + # In order to build general vision transformer backbone, we have to + # move MLA to neck. + self.norm = nn.ModuleList([ + build_norm_layer(norm_layer, in_channels[i])[1] + for i in range(len(in_channels)) + ]) + + self.mla = MLAModule( + in_channels=in_channels, + out_channels=out_channels, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + + # Convert from nchw to nlc + outs = [] + for i in range(len(inputs)): + x = inputs[i] + n, c, h, w = x.shape + x = x.reshape(n, c, h * w).transpose(2, 1).contiguous() + x = self.norm[i](x) + x = x.transpose(1, 2).reshape(n, c, h, w).contiguous() + outs.append(x) + + outs = self.mla(outs) + return tuple(outs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/multilevel_neck.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/multilevel_neck.py new file mode 100644 index 0000000..5151f87 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/necks/multilevel_neck.py @@ -0,0 +1,78 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch.nn as nn +from mmcv.cnn import ConvModule, xavier_init + +from mmseg.ops import resize +from ..builder import NECKS + + +@NECKS.register_module() +class MultiLevelNeck(nn.Module): + """MultiLevelNeck. + + A neck structure connect vit backbone and decoder_heads. + + Args: + in_channels (List[int]): Number of input channels per scale. + out_channels (int): Number of output channels (used at each scale). + scales (List[float]): Scale factors for each input feature map. + Default: [0.5, 1, 2, 4] + norm_cfg (dict): Config dict for normalization layer. Default: None. + act_cfg (dict): Config dict for activation layer in ConvModule. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + scales=[0.5, 1, 2, 4], + norm_cfg=None, + act_cfg=None): + super(MultiLevelNeck, self).__init__() + assert isinstance(in_channels, list) + self.in_channels = in_channels + self.out_channels = out_channels + self.scales = scales + self.num_outs = len(scales) + self.lateral_convs = nn.ModuleList() + self.convs = nn.ModuleList() + for in_channel in in_channels: + self.lateral_convs.append( + ConvModule( + in_channel, + out_channels, + kernel_size=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + for _ in range(self.num_outs): + self.convs.append( + ConvModule( + out_channels, + out_channels, + kernel_size=3, + padding=1, + stride=1, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + + # default init_weights for conv(msra) and norm in ConvModule + def init_weights(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + xavier_init(m, distribution='uniform') + + def forward(self, inputs): + assert len(inputs) == len(self.in_channels) + inputs = [ + lateral_conv(inputs[i]) + for i, lateral_conv in enumerate(self.lateral_convs) + ] + # for len(inputs) not equal to self.num_outs + if len(inputs) == 1: + inputs = [inputs[0] for _ in range(self.num_outs)] + outs = [] + for i in range(self.num_outs): + x_resize = resize( + inputs[i], scale_factor=self.scales[i], mode='bilinear') + outs.append(self.convs[i](x_resize)) + return tuple(outs) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/__init__.py new file mode 100644 index 0000000..387c858 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/__init__.py @@ -0,0 +1,6 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .base import BaseSegmentor +from .cascade_encoder_decoder import CascadeEncoderDecoder +from .encoder_decoder import EncoderDecoder + +__all__ = ['BaseSegmentor', 'EncoderDecoder', 'CascadeEncoderDecoder'] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/base.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/base.py new file mode 100644 index 0000000..63ff009 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/base.py @@ -0,0 +1,293 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings +from abc import ABCMeta, abstractmethod +from collections import OrderedDict + +import mmcv +import numpy as np +import torch +import torch.distributed as dist +from mmcv.runner import BaseModule, auto_fp16 + + +class BaseSegmentor(BaseModule, metaclass=ABCMeta): + """Base class for segmentors.""" + + def __init__(self, init_cfg=None): + super(BaseSegmentor, self).__init__(init_cfg) + self.fp16_enabled = False + + @property + def with_neck(self): + """bool: whether the segmentor has neck""" + return hasattr(self, 'neck') and self.neck is not None + + @property + def with_auxiliary_head(self): + """bool: whether the segmentor has auxiliary head""" + return hasattr(self, + 'auxiliary_head') and self.auxiliary_head is not None + + @property + def with_decode_head(self): + """bool: whether the segmentor has decode head""" + return hasattr(self, 'decode_head') and self.decode_head is not None + + @abstractmethod + def extract_feat(self, imgs): + """Placeholder for extract features from images.""" + pass + + @abstractmethod + def encode_decode(self, img, img_metas): + """Placeholder for encode images with backbone and decode into a + semantic segmentation map of the same size as input.""" + pass + + @abstractmethod + def forward_train(self, imgs, img_metas, **kwargs): + """Placeholder for Forward function for training.""" + pass + + @abstractmethod + def simple_test(self, img, img_meta, **kwargs): + """Placeholder for single image test.""" + pass + + @abstractmethod + def aug_test(self, imgs, img_metas, **kwargs): + """Placeholder for augmentation test.""" + pass + + def forward_test(self, imgs, img_metas, **kwargs): + """ + Args: + imgs (List[Tensor]): the outer list indicates test-time + augmentations and inner Tensor should have a shape NxCxHxW, + which contains all images in the batch. + img_metas (List[List[dict]]): the outer list indicates test-time + augs (multiscale, flip, etc.) and the inner list indicates + images in a batch. + """ + for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: + if not isinstance(var, list): + raise TypeError(f'{name} must be a list, but got ' + f'{type(var)}') + + num_augs = len(imgs) + if num_augs != len(img_metas): + raise ValueError(f'num of augmentations ({len(imgs)}) != ' + f'num of image meta ({len(img_metas)})') + # all images in the same aug batch all of the same ori_shape and pad + # shape + for img_meta in img_metas: + ori_shapes = [_['ori_shape'] for _ in img_meta] + assert all(shape == ori_shapes[0] for shape in ori_shapes) + img_shapes = [_['img_shape'] for _ in img_meta] + assert all(shape == img_shapes[0] for shape in img_shapes) + pad_shapes = [_['pad_shape'] for _ in img_meta] + assert all(shape == pad_shapes[0] for shape in pad_shapes) + + if num_augs == 1: + return self.simple_test(imgs[0], img_metas[0], **kwargs) + else: + return self.aug_test(imgs, img_metas, **kwargs) + + @auto_fp16(apply_to=('img', )) + def forward(self, img, img_metas, return_loss=True, **kwargs): + """Calls either :func:`forward_train` or :func:`forward_test` depending + on whether ``return_loss`` is ``True``. + + Note this setting will change the expected inputs. When + ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor + and List[dict]), and when ``resturn_loss=False``, img and img_meta + should be double nested (i.e. List[Tensor], List[List[dict]]), with + the outer list indicating test time augmentations. + """ + if return_loss: + return self.forward_train(img, img_metas, **kwargs) + else: + return self.forward_test(img, img_metas, **kwargs) + + def train_step(self, data_batch, optimizer, **kwargs): + """The iteration step during training. + + This method defines an iteration step during training, except for the + back propagation and optimizer updating, which are done in an optimizer + hook. Note that in some complicated cases or models, the whole process + including back propagation and optimizer updating is also defined in + this method, such as GAN. + + Args: + data (dict): The output of dataloader. + optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of + runner is passed to ``train_step()``. This argument is unused + and reserved. + + Returns: + dict: It should contain at least 3 keys: ``loss``, ``log_vars``, + ``num_samples``. + ``loss`` is a tensor for back propagation, which can be a + weighted sum of multiple losses. + ``log_vars`` contains all the variables to be sent to the + logger. + ``num_samples`` indicates the batch size (when the model is + DDP, it means the batch size on each GPU), which is used for + averaging the logs. + """ + losses = self(**data_batch) + loss, log_vars = self._parse_losses(losses) + + outputs = dict( + loss=loss, + log_vars=log_vars, + num_samples=len(data_batch['img_metas'])) + + return outputs + + def val_step(self, data_batch, optimizer=None, **kwargs): + """The iteration step during validation. + + This method shares the same signature as :func:`train_step`, but used + during val epochs. Note that the evaluation after training epochs is + not implemented with this method, but an evaluation hook. + """ + losses = self(**data_batch) + loss, log_vars = self._parse_losses(losses) + + log_vars_ = dict() + for loss_name, loss_value in log_vars.items(): + k = loss_name + '_val' + log_vars_[k] = loss_value + + outputs = dict( + loss=loss, + log_vars=log_vars_, + num_samples=len(data_batch['img_metas'])) + + return outputs + + @staticmethod + def _parse_losses(losses): + """Parse the raw outputs (losses) of the network. + + Args: + losses (dict): Raw output of the network, which usually contain + losses and other necessary information. + + Returns: + tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor + which may be a weighted sum of all losses, log_vars contains + all the variables to be sent to the logger. + """ + log_vars = OrderedDict() + for loss_name, loss_value in losses.items(): + if isinstance(loss_value, torch.Tensor): + log_vars[loss_name] = loss_value.mean() + elif isinstance(loss_value, list): + log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value) + else: + raise TypeError( + f'{loss_name} is not a tensor or list of tensors') + + loss = sum(_value for _key, _value in log_vars.items() + if 'loss' in _key) + + # If the loss_vars has different length, raise assertion error + # to prevent GPUs from infinite waiting. + if dist.is_available() and dist.is_initialized(): + log_var_length = torch.tensor(len(log_vars), device=loss.device) + dist.all_reduce(log_var_length) + message = (f'rank {dist.get_rank()}' + + f' len(log_vars): {len(log_vars)}' + ' keys: ' + + ','.join(log_vars.keys()) + '\n') + assert log_var_length == len(log_vars) * dist.get_world_size(), \ + 'loss log variables are different across GPUs!\n' + message + + log_vars['loss'] = loss + for loss_name, loss_value in log_vars.items(): + # reduce loss when distributed training + if dist.is_available() and dist.is_initialized(): + loss_value = loss_value.data.clone() + dist.all_reduce(loss_value.div_(dist.get_world_size())) + log_vars[loss_name] = loss_value.item() + + return loss, log_vars + + def show_result(self, + img, + result, + palette=None, + win_name='', + show=False, + wait_time=0, + out_file=None, + opacity=0.5): + """Draw `result` over `img`. + + Args: + img (str or Tensor): The image to be displayed. + result (Tensor): The semantic segmentation results to draw over + `img`. + palette (list[list[int]]] | np.ndarray | None): The palette of + segmentation map. If None is given, random palette will be + generated. Default: None + win_name (str): The window name. + wait_time (int): Value of waitKey param. + Default: 0. + show (bool): Whether to show the image. + Default: False. + out_file (str or None): The filename to write the image. + Default: None. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + Returns: + img (Tensor): Only if not `show` or `out_file` + """ + ############################################################################################################################# + #img = np.zeros((512, 512, 3)) + img = mmcv.imread(img) + img = img.copy() + seg = result[0] + if palette is None: + if self.PALETTE is None: + # Get random state before set seed, + # and restore random state later. + # It will prevent loss of randomness, as the palette + # may be different in each iteration if not specified. + # See: https://github.com/open-mmlab/mmdetection/issues/5844 + state = np.random.get_state() + np.random.seed(42) + # random palette + palette = np.random.randint( + 0, 255, size=(len(self.CLASSES), 3)) + np.random.set_state(state) + else: + palette = self.PALETTE + palette = np.array(palette) + assert palette.shape[0] == len(self.CLASSES) + assert palette.shape[1] == 3 + assert len(palette.shape) == 2 + assert 0 < opacity <= 1.0 + color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) + for label, color in enumerate(palette): + color_seg[seg == label, :] = color + # convert to BGR + color_seg = color_seg[..., ::-1] + + img = img * (1 - opacity) + color_seg * opacity + img = img.astype(np.uint8) + # if out_file specified, do not show image in window + if out_file is not None: + show = False + + if show: + mmcv.imshow(img, win_name, wait_time) + if out_file is not None: + mmcv.imwrite(img, out_file) + + if not (show or out_file): + warnings.warn('show==False and out_file is not specified, only ' + 'result image will be returned') + return img diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/cascade_encoder_decoder.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/cascade_encoder_decoder.py new file mode 100644 index 0000000..1913a22 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/cascade_encoder_decoder.py @@ -0,0 +1,88 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from torch import nn + +from mmseg.core import add_prefix +from mmseg.ops import resize +from .. import builder +from ..builder import SEGMENTORS +from .encoder_decoder import EncoderDecoder + + +@SEGMENTORS.register_module() +class CascadeEncoderDecoder(EncoderDecoder): + """Cascade Encoder Decoder segmentors. + + CascadeEncoderDecoder almost the same as EncoderDecoder, while decoders of + CascadeEncoderDecoder are cascaded. The output of previous decoder_head + will be the input of next decoder_head. + """ + + def __init__(self, + num_stages, + backbone, + decode_head, + neck=None, + auxiliary_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None, + init_cfg=None): + self.num_stages = num_stages + super(CascadeEncoderDecoder, self).__init__( + backbone=backbone, + decode_head=decode_head, + neck=neck, + auxiliary_head=auxiliary_head, + train_cfg=train_cfg, + test_cfg=test_cfg, + pretrained=pretrained, + init_cfg=init_cfg) + + def _init_decode_head(self, decode_head): + """Initialize ``decode_head``""" + assert isinstance(decode_head, list) + assert len(decode_head) == self.num_stages + self.decode_head = nn.ModuleList() + for i in range(self.num_stages): + self.decode_head.append(builder.build_head(decode_head[i])) + self.align_corners = self.decode_head[-1].align_corners + self.num_classes = self.decode_head[-1].num_classes + + def encode_decode(self, img, img_metas): + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(img) + out = self.decode_head[0].forward_test(x, img_metas, self.test_cfg) + for i in range(1, self.num_stages): + out = self.decode_head[i].forward_test(x, out, img_metas, + self.test_cfg) + out = resize( + input=out, + size=img.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + return out + + def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + + loss_decode = self.decode_head[0].forward_train( + x, img_metas, gt_semantic_seg, self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode_0')) + + for i in range(1, self.num_stages): + # forward test again, maybe unnecessary for most methods. + if i == 1: + prev_outputs = self.decode_head[0].forward_test( + x, img_metas, self.test_cfg) + else: + prev_outputs = self.decode_head[i - 1].forward_test( + x, prev_outputs, img_metas, self.test_cfg) + loss_decode = self.decode_head[i].forward_train( + x, prev_outputs, img_metas, gt_semantic_seg, self.train_cfg) + losses.update(add_prefix(loss_decode, f'decode_{i}')) + + return losses diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py new file mode 100644 index 0000000..d94a373 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/segmentors/encoder_decoder.py @@ -0,0 +1,290 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import torch.nn.functional as F + +from mmseg.core import add_prefix +from mmseg.ops import resize +from .. import builder +from ..builder import SEGMENTORS +from .base import BaseSegmentor + + +@SEGMENTORS.register_module() +class EncoderDecoder(BaseSegmentor): + """Encoder Decoder segmentors. + + EncoderDecoder typically consists of backbone, decode_head, auxiliary_head. + Note that auxiliary_head is only used for deep supervision during training, + which could be dumped during inference. + """ + + def __init__(self, + backbone, + decode_head, + neck=None, + auxiliary_head=None, + train_cfg=None, + test_cfg=None, + pretrained=None, + init_cfg=None): + super(EncoderDecoder, self).__init__(init_cfg) + if pretrained is not None: + assert backbone.get('pretrained') is None, \ + 'both backbone and segmentor set pretrained weight' + backbone.pretrained = pretrained + self.backbone = builder.build_backbone(backbone) + if neck is not None: + self.neck = builder.build_neck(neck) + self._init_decode_head(decode_head) + self._init_auxiliary_head(auxiliary_head) + + self.train_cfg = train_cfg + self.test_cfg = test_cfg + + assert self.with_decode_head + + def _init_decode_head(self, decode_head): + """Initialize ``decode_head``""" + self.decode_head = builder.build_head(decode_head) + self.align_corners = self.decode_head.align_corners + self.num_classes = self.decode_head.num_classes + + def _init_auxiliary_head(self, auxiliary_head): + """Initialize ``auxiliary_head``""" + if auxiliary_head is not None: + if isinstance(auxiliary_head, list): + self.auxiliary_head = nn.ModuleList() + for head_cfg in auxiliary_head: + self.auxiliary_head.append(builder.build_head(head_cfg)) + else: + self.auxiliary_head = builder.build_head(auxiliary_head) + + def extract_feat(self, img): + """Extract features from images.""" + x = self.backbone(img) + if self.with_neck: + x = self.neck(x) + return x + + def encode_decode(self, img, img_metas): + """Encode images with backbone and decode into a semantic segmentation + map of the same size as input.""" + x = self.extract_feat(img) + out = self._decode_head_forward_test(x, img_metas) + out = resize( + input=out, + size=img.shape[2:], + mode='bilinear', + align_corners=self.align_corners) + return out + + def _decode_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for decode head in + training.""" + losses = dict() + loss_decode = self.decode_head.forward_train(x, img_metas, + gt_semantic_seg, + self.train_cfg) + + losses.update(add_prefix(loss_decode, 'decode')) + return losses + + def _decode_head_forward_test(self, x, img_metas): + """Run forward function and calculate loss for decode head in + inference.""" + seg_logits = self.decode_head.forward_test(x, img_metas, self.test_cfg) + return seg_logits + + def _auxiliary_head_forward_train(self, x, img_metas, gt_semantic_seg): + """Run forward function and calculate loss for auxiliary head in + training.""" + losses = dict() + if isinstance(self.auxiliary_head, nn.ModuleList): + for idx, aux_head in enumerate(self.auxiliary_head): + loss_aux = aux_head.forward_train(x, img_metas, + gt_semantic_seg, + self.train_cfg) + losses.update(add_prefix(loss_aux, f'aux_{idx}')) + else: + loss_aux = self.auxiliary_head.forward_train( + x, img_metas, gt_semantic_seg, self.train_cfg) + losses.update(add_prefix(loss_aux, 'aux')) + + return losses + + def forward_dummy(self, img): + """Dummy forward function.""" + seg_logit = self.encode_decode(img, None) + + return seg_logit + + def forward_train(self, img, img_metas, gt_semantic_seg): + """Forward function for training. + + Args: + img (Tensor): Input images. + img_metas (list[dict]): List of image info dict where each dict + has: 'img_shape', 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + gt_semantic_seg (Tensor): Semantic segmentation masks + used if the architecture supports semantic segmentation task. + + Returns: + dict[str, Tensor]: a dictionary of loss components + """ + + x = self.extract_feat(img) + + losses = dict() + + loss_decode = self._decode_head_forward_train(x, img_metas, + gt_semantic_seg) + losses.update(loss_decode) + + if self.with_auxiliary_head: + loss_aux = self._auxiliary_head_forward_train( + x, img_metas, gt_semantic_seg) + losses.update(loss_aux) + + return losses + + # TODO refactor + def slide_inference(self, img, img_meta, rescale): + """Inference by sliding-window with overlap. + + If h_crop > h_img or w_crop > w_img, the small patch will be used to + decode without padding. + """ + + h_stride, w_stride = self.test_cfg.stride + h_crop, w_crop = self.test_cfg.crop_size + batch_size, _, h_img, w_img = img.size() + num_classes = self.num_classes + h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1 + w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1 + preds = img.new_zeros((batch_size, num_classes, h_img, w_img)) + count_mat = img.new_zeros((batch_size, 1, h_img, w_img)) + for h_idx in range(h_grids): + for w_idx in range(w_grids): + y1 = h_idx * h_stride + x1 = w_idx * w_stride + y2 = min(y1 + h_crop, h_img) + x2 = min(x1 + w_crop, w_img) + y1 = max(y2 - h_crop, 0) + x1 = max(x2 - w_crop, 0) + crop_img = img[:, :, y1:y2, x1:x2] + crop_seg_logit = self.encode_decode(crop_img, img_meta) + preds += F.pad(crop_seg_logit, + (int(x1), int(preds.shape[3] - x2), int(y1), + int(preds.shape[2] - y2))) + + count_mat[:, :, y1:y2, x1:x2] += 1 + assert (count_mat == 0).sum() == 0 + if torch.onnx.is_in_onnx_export(): + # cast count_mat to constant while exporting to ONNX + count_mat = torch.from_numpy( + count_mat.cpu().detach().numpy()).to(device=img.device) + preds = preds / count_mat + if rescale: + # remove padding area + resize_shape = img_meta[0]['img_shape'][:2] + preds = preds[:, :, :resize_shape[0], :resize_shape[1]] + preds = resize( + preds, + size=img_meta[0]['ori_shape'][:2], + mode='bilinear', + align_corners=self.align_corners, + warning=False) + return preds + + def whole_inference(self, img, img_meta, rescale): + """Inference with full image.""" + + seg_logit = self.encode_decode(img, img_meta) + if rescale: + # support dynamic shape for onnx + if torch.onnx.is_in_onnx_export(): + size = img.shape[2:] + else: + # remove padding area + resize_shape = img_meta[0]['img_shape'][:2] + seg_logit = seg_logit[:, :, :resize_shape[0], :resize_shape[1]] + size = img_meta[0]['ori_shape'][:2] + seg_logit = resize( + seg_logit, + size=size, + mode='bilinear', + align_corners=self.align_corners, + warning=False) + + return seg_logit + + def inference(self, img, img_meta, rescale): + """Inference with slide/whole style. + + Args: + img (Tensor): The input image of shape (N, 3, H, W). + img_meta (dict): Image info dict where each dict has: 'img_shape', + 'scale_factor', 'flip', and may also contain + 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. + For details on the values of these keys see + `mmseg/datasets/pipelines/formatting.py:Collect`. + rescale (bool): Whether rescale back to original shape. + + Returns: + Tensor: The output segmentation map. + """ + + assert self.test_cfg.mode in ['slide', 'whole'] + ori_shape = img_meta[0]['ori_shape'] + assert all(_['ori_shape'] == ori_shape for _ in img_meta) + if self.test_cfg.mode == 'slide': + seg_logit = self.slide_inference(img, img_meta, rescale) + else: + seg_logit = self.whole_inference(img, img_meta, rescale) + output = F.softmax(seg_logit, dim=1) + flip = img_meta[0]['flip'] + if flip: + flip_direction = img_meta[0]['flip_direction'] + assert flip_direction in ['horizontal', 'vertical'] + if flip_direction == 'horizontal': + output = output.flip(dims=(3, )) + elif flip_direction == 'vertical': + output = output.flip(dims=(2, )) + + return output + + def simple_test(self, img, img_meta, rescale=True): + """Simple test with single image.""" + seg_logit = self.inference(img, img_meta, rescale) + seg_pred = seg_logit.argmax(dim=1) + if torch.onnx.is_in_onnx_export(): + # our inference backend only support 4D output + seg_pred = seg_pred.unsqueeze(0) + return seg_pred + seg_pred = seg_pred.cpu().numpy() + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred + + def aug_test(self, imgs, img_metas, rescale=True): + """Test with augmentations. + + Only rescale=True is supported. + """ + # aug_test rescale all imgs back to ori_shape for now + assert rescale + # to save memory, we get augmented seg logit inplace + seg_logit = self.inference(imgs[0], img_metas[0], rescale) + for i in range(1, len(imgs)): + cur_seg_logit = self.inference(imgs[i], img_metas[i], rescale) + seg_logit += cur_seg_logit + seg_logit /= len(imgs) + seg_pred = seg_logit.argmax(dim=1) + seg_pred = seg_pred.cpu().numpy() + # unravel batch dim + seg_pred = list(seg_pred) + return seg_pred diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/__init__.py new file mode 100644 index 0000000..6d83290 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .embed import PatchEmbed +from .inverted_residual import InvertedResidual, InvertedResidualV3 +from .make_divisible import make_divisible +from .res_layer import ResLayer +from .se_layer import SELayer +from .self_attention_block import SelfAttentionBlock +from .shape_convert import (nchw2nlc2nchw, nchw_to_nlc, nlc2nchw2nlc, + nlc_to_nchw) +from .up_conv_block import UpConvBlock + +__all__ = [ + 'ResLayer', 'SelfAttentionBlock', 'make_divisible', 'InvertedResidual', + 'UpConvBlock', 'InvertedResidualV3', 'SELayer', 'PatchEmbed', + 'nchw_to_nlc', 'nlc_to_nchw', 'nchw2nlc2nchw', 'nlc2nchw2nlc' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/embed.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/embed.py new file mode 100644 index 0000000..1515675 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/embed.py @@ -0,0 +1,330 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +from typing import Sequence + +import torch.nn as nn +import torch.nn.functional as F +from mmcv.cnn import build_conv_layer, build_norm_layer +from mmcv.runner.base_module import BaseModule +from mmcv.utils import to_2tuple + + +class AdaptivePadding(nn.Module): + """Applies padding to input (if needed) so that input can get fully covered + by filter you specified. It support two modes "same" and "corner". The + "same" mode is same with "SAME" padding mode in TensorFlow, pad zero around + input. The "corner" mode would pad zero to bottom right. + + Args: + kernel_size (int | tuple): Size of the kernel: + stride (int | tuple): Stride of the filter. Default: 1: + dilation (int | tuple): Spacing between kernel elements. + Default: 1. + padding (str): Support "same" and "corner", "corner" mode + would pad zero to bottom right, and "same" mode would + pad zero around input. Default: "corner". + Example: + >>> kernel_size = 16 + >>> stride = 16 + >>> dilation = 1 + >>> input = torch.rand(1, 1, 15, 17) + >>> adap_pad = AdaptivePadding( + >>> kernel_size=kernel_size, + >>> stride=stride, + >>> dilation=dilation, + >>> padding="corner") + >>> out = adap_pad(input) + >>> assert (out.shape[2], out.shape[3]) == (16, 32) + >>> input = torch.rand(1, 1, 16, 17) + >>> out = adap_pad(input) + >>> assert (out.shape[2], out.shape[3]) == (16, 32) + """ + + def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'): + + super(AdaptivePadding, self).__init__() + + assert padding in ('same', 'corner') + + kernel_size = to_2tuple(kernel_size) + stride = to_2tuple(stride) + dilation = to_2tuple(dilation) + + self.padding = padding + self.kernel_size = kernel_size + self.stride = stride + self.dilation = dilation + + def get_pad_shape(self, input_shape): + input_h, input_w = input_shape + kernel_h, kernel_w = self.kernel_size + stride_h, stride_w = self.stride + output_h = math.ceil(input_h / stride_h) + output_w = math.ceil(input_w / stride_w) + pad_h = max((output_h - 1) * stride_h + + (kernel_h - 1) * self.dilation[0] + 1 - input_h, 0) + pad_w = max((output_w - 1) * stride_w + + (kernel_w - 1) * self.dilation[1] + 1 - input_w, 0) + return pad_h, pad_w + + def forward(self, x): + pad_h, pad_w = self.get_pad_shape(x.size()[-2:]) + if pad_h > 0 or pad_w > 0: + if self.padding == 'corner': + x = F.pad(x, [0, pad_w, 0, pad_h]) + elif self.padding == 'same': + x = F.pad(x, [ + pad_w // 2, pad_w - pad_w // 2, pad_h // 2, + pad_h - pad_h // 2 + ]) + return x + + +class PatchEmbed(BaseModule): + """Image to Patch Embedding. + + We use a conv layer to implement PatchEmbed. + + Args: + in_channels (int): The num of input channels. Default: 3 + embed_dims (int): The dimensions of embedding. Default: 768 + conv_type (str): The config dict for embedding + conv layer type selection. Default: "Conv2d". + kernel_size (int): The kernel_size of embedding conv. Default: 16. + stride (int, optional): The slide stride of embedding conv. + Default: None (Would be set as `kernel_size`). + padding (int | tuple | string ): The padding length of + embedding conv. When it is a string, it means the mode + of adaptive padding, support "same" and "corner" now. + Default: "corner". + dilation (int): The dilation rate of embedding conv. Default: 1. + bias (bool): Bias of embed conv. Default: True. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: None. + input_size (int | tuple | None): The size of input, which will be + used to calculate the out size. Only work when `dynamic_size` + is False. Default: None. + init_cfg (`mmcv.ConfigDict`, optional): The Config for initialization. + Default: None. + """ + + def __init__(self, + in_channels=3, + embed_dims=768, + conv_type='Conv2d', + kernel_size=16, + stride=None, + padding='corner', + dilation=1, + bias=True, + norm_cfg=None, + input_size=None, + init_cfg=None): + super(PatchEmbed, self).__init__(init_cfg=init_cfg) + + self.embed_dims = embed_dims + if stride is None: + stride = kernel_size + + kernel_size = to_2tuple(kernel_size) + stride = to_2tuple(stride) + dilation = to_2tuple(dilation) + + if isinstance(padding, str): + self.adap_padding = AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding) + # disable the padding of conv + padding = 0 + else: + self.adap_padding = None + padding = to_2tuple(padding) + + self.projection = build_conv_layer( + dict(type=conv_type), + in_channels=in_channels, + out_channels=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + + if norm_cfg is not None: + self.norm = build_norm_layer(norm_cfg, embed_dims)[1] + else: + self.norm = None + + if input_size: + input_size = to_2tuple(input_size) + # `init_out_size` would be used outside to + # calculate the num_patches + # when `use_abs_pos_embed` outside + self.init_input_size = input_size + if self.adap_padding: + pad_h, pad_w = self.adap_padding.get_pad_shape(input_size) + input_h, input_w = input_size + input_h = input_h + pad_h + input_w = input_w + pad_w + input_size = (input_h, input_w) + + # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html + h_out = (input_size[0] + 2 * padding[0] - dilation[0] * + (kernel_size[0] - 1) - 1) // stride[0] + 1 + w_out = (input_size[1] + 2 * padding[1] - dilation[1] * + (kernel_size[1] - 1) - 1) // stride[1] + 1 + self.init_out_size = (h_out, w_out) + else: + self.init_input_size = None + self.init_out_size = None + + def forward(self, x): + """ + Args: + x (Tensor): Has shape (B, C, H, W). In most case, C is 3. + + Returns: + tuple: Contains merged results and its spatial shape. + + - x (Tensor): Has shape (B, out_h * out_w, embed_dims) + - out_size (tuple[int]): Spatial shape of x, arrange as + (out_h, out_w). + """ + + if self.adap_padding: + x = self.adap_padding(x) + + x = self.projection(x) + out_size = (x.shape[2], x.shape[3]) + x = x.flatten(2).transpose(1, 2) + if self.norm is not None: + x = self.norm(x) + return x, out_size + + +class PatchMerging(BaseModule): + """Merge patch feature map. + + This layer groups feature map by kernel_size, and applies norm and linear + layers to the grouped feature map. Our implementation uses `nn.Unfold` to + merge patch, which is about 25% faster than original implementation. + Instead, we need to modify pretrained models for compatibility. + + Args: + in_channels (int): The num of input channels. + out_channels (int): The num of output channels. + kernel_size (int | tuple, optional): the kernel size in the unfold + layer. Defaults to 2. + stride (int | tuple, optional): the stride of the sliding blocks in the + unfold layer. Default: None. (Would be set as `kernel_size`) + padding (int | tuple | string ): The padding length of + embedding conv. When it is a string, it means the mode + of adaptive padding, support "same" and "corner" now. + Default: "corner". + dilation (int | tuple, optional): dilation parameter in the unfold + layer. Default: 1. + bias (bool, optional): Whether to add bias in linear layer or not. + Defaults: False. + norm_cfg (dict, optional): Config dict for normalization layer. + Default: dict(type='LN'). + init_cfg (dict, optional): The extra config for initialization. + Default: None. + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size=2, + stride=None, + padding='corner', + dilation=1, + bias=False, + norm_cfg=dict(type='LN'), + init_cfg=None): + super().__init__(init_cfg=init_cfg) + self.in_channels = in_channels + self.out_channels = out_channels + if stride: + stride = stride + else: + stride = kernel_size + + kernel_size = to_2tuple(kernel_size) + stride = to_2tuple(stride) + dilation = to_2tuple(dilation) + + if isinstance(padding, str): + self.adap_padding = AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding) + # disable the padding of unfold + padding = 0 + else: + self.adap_padding = None + + padding = to_2tuple(padding) + self.sampler = nn.Unfold( + kernel_size=kernel_size, + dilation=dilation, + padding=padding, + stride=stride) + + sample_dim = kernel_size[0] * kernel_size[1] * in_channels + + if norm_cfg is not None: + self.norm = build_norm_layer(norm_cfg, sample_dim)[1] + else: + self.norm = None + + self.reduction = nn.Linear(sample_dim, out_channels, bias=bias) + + def forward(self, x, input_size): + """ + Args: + x (Tensor): Has shape (B, H*W, C_in). + input_size (tuple[int]): The spatial shape of x, arrange as (H, W). + Default: None. + + Returns: + tuple: Contains merged results and its spatial shape. + + - x (Tensor): Has shape (B, Merged_H * Merged_W, C_out) + - out_size (tuple[int]): Spatial shape of x, arrange as + (Merged_H, Merged_W). + """ + B, L, C = x.shape + assert isinstance(input_size, Sequence), f'Expect ' \ + f'input_size is ' \ + f'`Sequence` ' \ + f'but get {input_size}' + + H, W = input_size + assert L == H * W, 'input feature has wrong size' + + x = x.view(B, H, W, C).permute([0, 3, 1, 2]) # B, C, H, W + # Use nn.Unfold to merge patch. About 25% faster than original method, + # but need to modify pretrained model for compatibility + + if self.adap_padding: + x = self.adap_padding(x) + H, W = x.shape[-2:] + + x = self.sampler(x) + # if kernel_size=2 and stride=2, x should has shape (B, 4*C, H/2*W/2) + + out_h = (H + 2 * self.sampler.padding[0] - self.sampler.dilation[0] * + (self.sampler.kernel_size[0] - 1) - + 1) // self.sampler.stride[0] + 1 + out_w = (W + 2 * self.sampler.padding[1] - self.sampler.dilation[1] * + (self.sampler.kernel_size[1] - 1) - + 1) // self.sampler.stride[1] + 1 + + output_size = (out_h, out_w) + x = x.transpose(1, 2) # B, H/2*W/2, 4*C + x = self.norm(x) if self.norm else x + x = self.reduction(x) + return x, output_size diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/inverted_residual.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/inverted_residual.py new file mode 100644 index 0000000..c9cda76 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/inverted_residual.py @@ -0,0 +1,213 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.cnn import ConvModule +from torch import nn +from torch.utils import checkpoint as cp + +from .se_layer import SELayer + + +class InvertedResidual(nn.Module): + """InvertedResidual block for MobileNetV2. + + Args: + in_channels (int): The input channels of the InvertedResidual block. + out_channels (int): The output channels of the InvertedResidual block. + stride (int): Stride of the middle (first) 3x3 convolution. + expand_ratio (int): Adjusts number of channels of the hidden layer + in InvertedResidual by this amount. + dilation (int): Dilation rate of depthwise conv. Default: 1 + conv_cfg (dict): Config dict for convolution layer. + Default: None, which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU6'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, + in_channels, + out_channels, + stride, + expand_ratio, + dilation=1, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU6'), + with_cp=False, + **kwargs): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2], f'stride must in [1, 2]. ' \ + f'But received {stride}.' + self.with_cp = with_cp + self.use_res_connect = self.stride == 1 and in_channels == out_channels + hidden_dim = int(round(in_channels * expand_ratio)) + + layers = [] + if expand_ratio != 1: + layers.append( + ConvModule( + in_channels=in_channels, + out_channels=hidden_dim, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs)) + layers.extend([ + ConvModule( + in_channels=hidden_dim, + out_channels=hidden_dim, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + groups=hidden_dim, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + **kwargs), + ConvModule( + in_channels=hidden_dim, + out_channels=out_channels, + kernel_size=1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None, + **kwargs) + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + + def _inner_forward(x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out + + +class InvertedResidualV3(nn.Module): + """Inverted Residual Block for MobileNetV3. + + Args: + in_channels (int): The input channels of this Module. + out_channels (int): The output channels of this Module. + mid_channels (int): The input channels of the depthwise convolution. + kernel_size (int): The kernel size of the depthwise convolution. + Default: 3. + stride (int): The stride of the depthwise convolution. Default: 1. + se_cfg (dict): Config dict for se layer. Default: None, which means no + se layer. + with_expand_conv (bool): Use expand conv or not. If set False, + mid_channels must be the same with in_channels. Default: True. + conv_cfg (dict): Config dict for convolution layer. Default: None, + which means using conv2d. + norm_cfg (dict): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict): Config dict for activation layer. + Default: dict(type='ReLU'). + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + + Returns: + Tensor: The output tensor. + """ + + def __init__(self, + in_channels, + out_channels, + mid_channels, + kernel_size=3, + stride=1, + se_cfg=None, + with_expand_conv=True, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + with_cp=False): + super(InvertedResidualV3, self).__init__() + self.with_res_shortcut = (stride == 1 and in_channels == out_channels) + assert stride in [1, 2] + self.with_cp = with_cp + self.with_se = se_cfg is not None + self.with_expand_conv = with_expand_conv + + if self.with_se: + assert isinstance(se_cfg, dict) + if not self.with_expand_conv: + assert mid_channels == in_channels + + if self.with_expand_conv: + self.expand_conv = ConvModule( + in_channels=in_channels, + out_channels=mid_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.depthwise_conv = ConvModule( + in_channels=mid_channels, + out_channels=mid_channels, + kernel_size=kernel_size, + stride=stride, + padding=kernel_size // 2, + groups=mid_channels, + conv_cfg=dict( + type='Conv2dAdaptivePadding') if stride == 2 else conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + if self.with_se: + self.se = SELayer(**se_cfg) + + self.linear_conv = ConvModule( + in_channels=mid_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=None) + + def forward(self, x): + + def _inner_forward(x): + out = x + + if self.with_expand_conv: + out = self.expand_conv(out) + + out = self.depthwise_conv(out) + + if self.with_se: + out = self.se(out) + + out = self.linear_conv(out) + + if self.with_res_shortcut: + return x + out + else: + return out + + if self.with_cp and x.requires_grad: + out = cp.checkpoint(_inner_forward, x) + else: + out = _inner_forward(x) + + return out diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/make_divisible.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/make_divisible.py new file mode 100644 index 0000000..ed42c2e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/make_divisible.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +def make_divisible(value, divisor, min_value=None, min_ratio=0.9): + """Make divisible function. + + This function rounds the channel number to the nearest value that can be + divisible by the divisor. It is taken from the original tf repo. It ensures + that all layers have a channel number that is divisible by divisor. It can + be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py # noqa + + Args: + value (int): The original channel number. + divisor (int): The divisor to fully divide the channel number. + min_value (int): The minimum value of the output channel. + Default: None, means that the minimum value equal to the divisor. + min_ratio (float): The minimum ratio of the rounded channel number to + the original channel number. Default: 0.9. + + Returns: + int: The modified output channel number. + """ + + if min_value is None: + min_value = divisor + new_value = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than (1-min_ratio). + if new_value < min_ratio * value: + new_value += divisor + return new_value diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/res_layer.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/res_layer.py new file mode 100644 index 0000000..190a0c5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/res_layer.py @@ -0,0 +1,96 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.cnn import build_conv_layer, build_norm_layer +from mmcv.runner import Sequential +from torch import nn as nn + + +class ResLayer(Sequential): + """ResLayer to build ResNet style backbone. + + Args: + block (nn.Module): block used to build ResLayer. + inplanes (int): inplanes of block. + planes (int): planes of block. + num_blocks (int): number of blocks. + stride (int): stride of the first block. Default: 1 + avg_down (bool): Use AvgPool instead of stride conv when + downsampling in the bottleneck. Default: False + conv_cfg (dict): dictionary to construct and config conv layer. + Default: None + norm_cfg (dict): dictionary to construct and config norm layer. + Default: dict(type='BN') + multi_grid (int | None): Multi grid dilation rates of last + stage. Default: None + contract_dilation (bool): Whether contract first dilation of each layer + Default: False + """ + + def __init__(self, + block, + inplanes, + planes, + num_blocks, + stride=1, + dilation=1, + avg_down=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + multi_grid=None, + contract_dilation=False, + **kwargs): + self.block = block + + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = [] + conv_stride = stride + if avg_down: + conv_stride = 1 + downsample.append( + nn.AvgPool2d( + kernel_size=stride, + stride=stride, + ceil_mode=True, + count_include_pad=False)) + downsample.extend([ + build_conv_layer( + conv_cfg, + inplanes, + planes * block.expansion, + kernel_size=1, + stride=conv_stride, + bias=False), + build_norm_layer(norm_cfg, planes * block.expansion)[1] + ]) + downsample = nn.Sequential(*downsample) + + layers = [] + if multi_grid is None: + if dilation > 1 and contract_dilation: + first_dilation = dilation // 2 + else: + first_dilation = dilation + else: + first_dilation = multi_grid[0] + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=stride, + dilation=first_dilation, + downsample=downsample, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs)) + inplanes = planes * block.expansion + for i in range(1, num_blocks): + layers.append( + block( + inplanes=inplanes, + planes=planes, + stride=1, + dilation=dilation if multi_grid is None else multi_grid[i], + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + **kwargs)) + super(ResLayer, self).__init__(*layers) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/se_layer.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/se_layer.py new file mode 100644 index 0000000..16f52aa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/se_layer.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmcv +import torch.nn as nn +from mmcv.cnn import ConvModule + +from .make_divisible import make_divisible + + +class SELayer(nn.Module): + """Squeeze-and-Excitation Module. + + Args: + channels (int): The input (and output) channels of the SE layer. + ratio (int): Squeeze ratio in SELayer, the intermediate channel will be + ``int(channels/ratio)``. Default: 16. + conv_cfg (None or dict): Config dict for convolution layer. + Default: None, which means using conv2d. + act_cfg (dict or Sequence[dict]): Config dict for activation layer. + If act_cfg is a dict, two activation layers will be configured + by this dict. If act_cfg is a sequence of dicts, the first + activation layer will be configured by the first dict and the + second activation layer will be configured by the second dict. + Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0, + divisor=6.0)). + """ + + def __init__(self, + channels, + ratio=16, + conv_cfg=None, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))): + super(SELayer, self).__init__() + if isinstance(act_cfg, dict): + act_cfg = (act_cfg, act_cfg) + assert len(act_cfg) == 2 + assert mmcv.is_tuple_of(act_cfg, dict) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.conv1 = ConvModule( + in_channels=channels, + out_channels=make_divisible(channels // ratio, 8), + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[0]) + self.conv2 = ConvModule( + in_channels=make_divisible(channels // ratio, 8), + out_channels=channels, + kernel_size=1, + stride=1, + conv_cfg=conv_cfg, + act_cfg=act_cfg[1]) + + def forward(self, x): + out = self.global_avgpool(x) + out = self.conv1(out) + out = self.conv2(out) + return x * out diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/self_attention_block.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/self_attention_block.py new file mode 100644 index 0000000..c945fa7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/self_attention_block.py @@ -0,0 +1,160 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.cnn import ConvModule, constant_init +from torch import nn as nn +from torch.nn import functional as F + + +class SelfAttentionBlock(nn.Module): + """General self-attention block/non-local block. + + Please refer to https://arxiv.org/abs/1706.03762 for details about key, + query and value. + + Args: + key_in_channels (int): Input channels of key feature. + query_in_channels (int): Input channels of query feature. + channels (int): Output channels of key/query transform. + out_channels (int): Output channels. + share_key_query (bool): Whether share projection weight between key + and query projection. + query_downsample (nn.Module): Query downsample module. + key_downsample (nn.Module): Key downsample module. + key_query_num_convs (int): Number of convs for key/query projection. + value_num_convs (int): Number of convs for value projection. + matmul_norm (bool): Whether normalize attention map with sqrt of + channels + with_out (bool): Whether use out projection. + conv_cfg (dict|None): Config of conv layers. + norm_cfg (dict|None): Config of norm layers. + act_cfg (dict|None): Config of activation layers. + """ + + def __init__(self, key_in_channels, query_in_channels, channels, + out_channels, share_key_query, query_downsample, + key_downsample, key_query_num_convs, value_out_num_convs, + key_query_norm, value_out_norm, matmul_norm, with_out, + conv_cfg, norm_cfg, act_cfg): + super(SelfAttentionBlock, self).__init__() + if share_key_query: + assert key_in_channels == query_in_channels + self.key_in_channels = key_in_channels + self.query_in_channels = query_in_channels + self.out_channels = out_channels + self.channels = channels + self.share_key_query = share_key_query + self.conv_cfg = conv_cfg + self.norm_cfg = norm_cfg + self.act_cfg = act_cfg + self.key_project = self.build_project( + key_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if share_key_query: + self.query_project = self.key_project + else: + self.query_project = self.build_project( + query_in_channels, + channels, + num_convs=key_query_num_convs, + use_conv_module=key_query_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + self.value_project = self.build_project( + key_in_channels, + channels if with_out else out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + if with_out: + self.out_project = self.build_project( + channels, + out_channels, + num_convs=value_out_num_convs, + use_conv_module=value_out_norm, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + else: + self.out_project = None + + self.query_downsample = query_downsample + self.key_downsample = key_downsample + self.matmul_norm = matmul_norm + + self.init_weights() + + def init_weights(self): + """Initialize weight of later layer.""" + if self.out_project is not None: + if not isinstance(self.out_project, ConvModule): + constant_init(self.out_project, 0) + + def build_project(self, in_channels, channels, num_convs, use_conv_module, + conv_cfg, norm_cfg, act_cfg): + """Build projection layer for key/query/value/out.""" + if use_conv_module: + convs = [ + ConvModule( + in_channels, + channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + ] + for _ in range(num_convs - 1): + convs.append( + ConvModule( + channels, + channels, + 1, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg)) + else: + convs = [nn.Conv2d(in_channels, channels, 1)] + for _ in range(num_convs - 1): + convs.append(nn.Conv2d(channels, channels, 1)) + if len(convs) > 1: + convs = nn.Sequential(*convs) + else: + convs = convs[0] + return convs + + def forward(self, query_feats, key_feats): + """Forward function.""" + batch_size = query_feats.size(0) + query = self.query_project(query_feats) + if self.query_downsample is not None: + query = self.query_downsample(query) + query = query.reshape(*query.shape[:2], -1) + query = query.permute(0, 2, 1).contiguous() + + key = self.key_project(key_feats) + value = self.value_project(key_feats) + if self.key_downsample is not None: + key = self.key_downsample(key) + value = self.key_downsample(value) + key = key.reshape(*key.shape[:2], -1) + value = value.reshape(*value.shape[:2], -1) + value = value.permute(0, 2, 1).contiguous() + + sim_map = torch.matmul(query, key) + if self.matmul_norm: + sim_map = (self.channels**-.5) * sim_map + sim_map = F.softmax(sim_map, dim=-1) + + context = torch.matmul(sim_map, value) + context = context.permute(0, 2, 1).contiguous() + context = context.reshape(batch_size, -1, *query_feats.shape[2:]) + if self.out_project is not None: + context = self.out_project(context) + return context diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/shape_convert.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/shape_convert.py new file mode 100644 index 0000000..cce1e22 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/shape_convert.py @@ -0,0 +1,107 @@ +# Copyright (c) OpenMMLab. All rights reserved. +def nlc_to_nchw(x, hw_shape): + """Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor. + + Args: + x (Tensor): The input tensor of shape [N, L, C] before conversion. + hw_shape (Sequence[int]): The height and width of output feature map. + + Returns: + Tensor: The output tensor of shape [N, C, H, W] after conversion. + """ + H, W = hw_shape + assert len(x.shape) == 3 + B, L, C = x.shape + assert L == H * W, 'The seq_len doesn\'t match H, W' + return x.transpose(1, 2).reshape(B, C, H, W) + + +def nchw_to_nlc(x): + """Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor. + + Args: + x (Tensor): The input tensor of shape [N, C, H, W] before conversion. + + Returns: + Tensor: The output tensor of shape [N, L, C] after conversion. + """ + assert len(x.shape) == 4 + return x.flatten(2).transpose(1, 2).contiguous() + + +def nchw2nlc2nchw(module, x, contiguous=False, **kwargs): + """Flatten [N, C, H, W] shape tensor `x` to [N, L, C] shape tensor. Use the + reshaped tensor as the input of `module`, and the convert the output of + `module`, whose shape is. + + [N, L, C], to [N, C, H, W]. + + Args: + module (Callable): A callable object the takes a tensor + with shape [N, L, C] as input. + x (Tensor): The input tensor of shape [N, C, H, W]. + contiguous: + contiguous (Bool): Whether to make the tensor contiguous + after each shape transform. + + Returns: + Tensor: The output tensor of shape [N, C, H, W]. + + Example: + >>> import torch + >>> import torch.nn as nn + >>> norm = nn.LayerNorm(4) + >>> feature_map = torch.rand(4, 4, 5, 5) + >>> output = nchw2nlc2nchw(norm, feature_map) + """ + B, C, H, W = x.shape + if not contiguous: + x = x.flatten(2).transpose(1, 2) + x = module(x, **kwargs) + x = x.transpose(1, 2).reshape(B, C, H, W) + else: + x = x.flatten(2).transpose(1, 2).contiguous() + x = module(x, **kwargs) + x = x.transpose(1, 2).reshape(B, C, H, W).contiguous() + return x + + +def nlc2nchw2nlc(module, x, hw_shape, contiguous=False, **kwargs): + """Convert [N, L, C] shape tensor `x` to [N, C, H, W] shape tensor. Use the + reshaped tensor as the input of `module`, and convert the output of + `module`, whose shape is. + + [N, C, H, W], to [N, L, C]. + + Args: + module (Callable): A callable object the takes a tensor + with shape [N, C, H, W] as input. + x (Tensor): The input tensor of shape [N, L, C]. + hw_shape: (Sequence[int]): The height and width of the + feature map with shape [N, C, H, W]. + contiguous (Bool): Whether to make the tensor contiguous + after each shape transform. + + Returns: + Tensor: The output tensor of shape [N, L, C]. + + Example: + >>> import torch + >>> import torch.nn as nn + >>> conv = nn.Conv2d(16, 16, 3, 1, 1) + >>> feature_map = torch.rand(4, 25, 16) + >>> output = nlc2nchw2nlc(conv, feature_map, (5, 5)) + """ + H, W = hw_shape + assert len(x.shape) == 3 + B, L, C = x.shape + assert L == H * W, 'The seq_len doesn\'t match H, W' + if not contiguous: + x = x.transpose(1, 2).reshape(B, C, H, W) + x = module(x, **kwargs) + x = x.flatten(2).transpose(1, 2) + else: + x = x.transpose(1, 2).reshape(B, C, H, W).contiguous() + x = module(x, **kwargs) + x = x.flatten(2).transpose(1, 2).contiguous() + return x diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/up_conv_block.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/up_conv_block.py new file mode 100644 index 0000000..d8396d9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/models/utils/up_conv_block.py @@ -0,0 +1,102 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule, build_upsample_layer + + +class UpConvBlock(nn.Module): + """Upsample convolution block in decoder for UNet. + + This upsample convolution block consists of one upsample module + followed by one convolution block. The upsample module expands the + high-level low-resolution feature map and the convolution block fuses + the upsampled high-level low-resolution feature map and the low-level + high-resolution feature map from encoder. + + Args: + conv_block (nn.Sequential): Sequential of convolutional layers. + in_channels (int): Number of input channels of the high-level + skip_channels (int): Number of input channels of the low-level + high-resolution feature map from encoder. + out_channels (int): Number of output channels. + num_convs (int): Number of convolutional layers in the conv_block. + Default: 2. + stride (int): Stride of convolutional layer in conv_block. Default: 1. + dilation (int): Dilation rate of convolutional layer in conv_block. + Default: 1. + with_cp (bool): Use checkpoint or not. Using checkpoint will save some + memory while slowing down the training speed. Default: False. + conv_cfg (dict | None): Config dict for convolution layer. + Default: None. + norm_cfg (dict | None): Config dict for normalization layer. + Default: dict(type='BN'). + act_cfg (dict | None): Config dict for activation layer in ConvModule. + Default: dict(type='ReLU'). + upsample_cfg (dict): The upsample config of the upsample module in + decoder. Default: dict(type='InterpConv'). If the size of + high-level feature map is the same as that of skip feature map + (low-level feature map from encoder), it does not need upsample the + high-level feature map and the upsample_cfg is None. + dcn (bool): Use deformable convolution in convolutional layer or not. + Default: None. + plugins (dict): plugins for convolutional layers. Default: None. + """ + + def __init__(self, + conv_block, + in_channels, + skip_channels, + out_channels, + num_convs=2, + stride=1, + dilation=1, + with_cp=False, + conv_cfg=None, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + dcn=None, + plugins=None): + super(UpConvBlock, self).__init__() + assert dcn is None, 'Not implemented yet.' + assert plugins is None, 'Not implemented yet.' + + self.conv_block = conv_block( + in_channels=2 * skip_channels, + out_channels=out_channels, + num_convs=num_convs, + stride=stride, + dilation=dilation, + with_cp=with_cp, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg, + dcn=None, + plugins=None) + if upsample_cfg is not None: + self.upsample = build_upsample_layer( + cfg=upsample_cfg, + in_channels=in_channels, + out_channels=skip_channels, + with_cp=with_cp, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + else: + self.upsample = ConvModule( + in_channels, + skip_channels, + kernel_size=1, + stride=1, + padding=0, + conv_cfg=conv_cfg, + norm_cfg=norm_cfg, + act_cfg=act_cfg) + + def forward(self, skip, x): + """Forward function.""" + + x = self.upsample(x) + out = torch.cat([skip, x], dim=1) + out = self.conv_block(out) + + return out diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/ops/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/ops/__init__.py new file mode 100644 index 0000000..bc075cd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/ops/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .encoding import Encoding +from .wrappers import Upsample, resize + +__all__ = ['Upsample', 'resize', 'Encoding'] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/ops/encoding.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/ops/encoding.py new file mode 100644 index 0000000..f397cc5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/ops/encoding.py @@ -0,0 +1,75 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch import nn +from torch.nn import functional as F + + +class Encoding(nn.Module): + """Encoding Layer: a learnable residual encoder. + + Input is of shape (batch_size, channels, height, width). + Output is of shape (batch_size, num_codes, channels). + + Args: + channels: dimension of the features or feature channels + num_codes: number of code words + """ + + def __init__(self, channels, num_codes): + super(Encoding, self).__init__() + # init codewords and smoothing factor + self.channels, self.num_codes = channels, num_codes + std = 1. / ((num_codes * channels)**0.5) + # [num_codes, channels] + self.codewords = nn.Parameter( + torch.empty(num_codes, channels, + dtype=torch.float).uniform_(-std, std), + requires_grad=True) + # [num_codes] + self.scale = nn.Parameter( + torch.empty(num_codes, dtype=torch.float).uniform_(-1, 0), + requires_grad=True) + + @staticmethod + def scaled_l2(x, codewords, scale): + num_codes, channels = codewords.size() + batch_size = x.size(0) + reshaped_scale = scale.view((1, 1, num_codes)) + expanded_x = x.unsqueeze(2).expand( + (batch_size, x.size(1), num_codes, channels)) + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + + scaled_l2_norm = reshaped_scale * ( + expanded_x - reshaped_codewords).pow(2).sum(dim=3) + return scaled_l2_norm + + @staticmethod + def aggregate(assignment_weights, x, codewords): + num_codes, channels = codewords.size() + reshaped_codewords = codewords.view((1, 1, num_codes, channels)) + batch_size = x.size(0) + + expanded_x = x.unsqueeze(2).expand( + (batch_size, x.size(1), num_codes, channels)) + encoded_feat = (assignment_weights.unsqueeze(3) * + (expanded_x - reshaped_codewords)).sum(dim=1) + return encoded_feat + + def forward(self, x): + assert x.dim() == 4 and x.size(1) == self.channels + # [batch_size, channels, height, width] + batch_size = x.size(0) + # [batch_size, height x width, channels] + x = x.view(batch_size, self.channels, -1).transpose(1, 2).contiguous() + # assignment_weights: [batch_size, channels, num_codes] + assignment_weights = F.softmax( + self.scaled_l2(x, self.codewords, self.scale), dim=2) + # aggregate + encoded_feat = self.aggregate(assignment_weights, x, self.codewords) + return encoded_feat + + def __repr__(self): + repr_str = self.__class__.__name__ + repr_str += f'(Nx{self.channels}xHxW =>Nx{self.num_codes}' \ + f'x{self.channels})' + return repr_str diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/ops/wrappers.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/ops/wrappers.py new file mode 100644 index 0000000..bcababd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/ops/wrappers.py @@ -0,0 +1,51 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import warnings + +import torch.nn as nn +import torch.nn.functional as F + + +def resize(input, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None, + warning=True): + if warning: + if size is not None and align_corners: + input_h, input_w = tuple(int(x) for x in input.shape[2:]) + output_h, output_w = tuple(int(x) for x in size) + if output_h > input_h or output_w > input_w: + if ((output_h > 1 and output_w > 1 and input_h > 1 + and input_w > 1) and (output_h - 1) % (input_h - 1) + and (output_w - 1) % (input_w - 1)): + warnings.warn( + f'When align_corners={align_corners}, ' + 'the output would more aligned if ' + f'input size {(input_h, input_w)} is `x+1` and ' + f'out size {(output_h, output_w)} is `nx+1`') + return F.interpolate(input, size, scale_factor, mode, align_corners) + + +class Upsample(nn.Module): + + def __init__(self, + size=None, + scale_factor=None, + mode='nearest', + align_corners=None): + super(Upsample, self).__init__() + self.size = size + if isinstance(scale_factor, tuple): + self.scale_factor = tuple(float(factor) for factor in scale_factor) + else: + self.scale_factor = float(scale_factor) if scale_factor else None + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + if not self.size: + size = [int(t * self.scale_factor) for t in x.shape[-2:]] + else: + size = self.size + return resize(x, size, None, self.mode, self.align_corners) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/__init__.py new file mode 100644 index 0000000..e3ef4b3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .collect_env import collect_env +from .logger import get_root_logger +from .misc import find_latest_checkpoint +from .set_env import setup_multi_processes +from .util_distribution import build_ddp, build_dp, get_device + +__all__ = [ + 'get_root_logger', 'collect_env', 'find_latest_checkpoint', + 'setup_multi_processes', 'build_ddp', 'build_dp', 'get_device' +] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/collect_env.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/collect_env.py new file mode 100644 index 0000000..3379ecb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/collect_env.py @@ -0,0 +1,18 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.utils import collect_env as collect_base_env +from mmcv.utils import get_git_hash + +import mmseg + + +def collect_env(): + """Collect the information of the running environments.""" + env_info = collect_base_env() + env_info['MMSegmentation'] = f'{mmseg.__version__}+{get_git_hash()[:7]}' + + return env_info + + +if __name__ == '__main__': + for name, val in collect_env().items(): + print('{}: {}'.format(name, val)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/logger.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/logger.py new file mode 100644 index 0000000..0cb3c78 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/logger.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging + +from mmcv.utils import get_logger + + +def get_root_logger(log_file=None, log_level=logging.INFO): + """Get the root logger. + + The logger will be initialized if it has not been initialized. By default a + StreamHandler will be added. If `log_file` is specified, a FileHandler will + also be added. The name of the root logger is the top-level package name, + e.g., "mmseg". + + Args: + log_file (str | None): The log filename. If specified, a FileHandler + will be added to the root logger. + log_level (int): The root logger level. Note that only the process of + rank 0 is affected, while other processes will set the level to + "Error" and be silent most of the time. + + Returns: + logging.Logger: The root logger. + """ + + logger = get_logger(name='mmseg', log_file=log_file, log_level=log_level) + + return logger diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/misc.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/misc.py new file mode 100644 index 0000000..bd1b6b1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/misc.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import glob +import os.path as osp +import warnings + + +def find_latest_checkpoint(path, suffix='pth'): + """This function is for finding the latest checkpoint. + + It will be used when automatically resume, modified from + https://github.com/open-mmlab/mmdetection/blob/dev-v2.20.0/mmdet/utils/misc.py + + Args: + path (str): The path to find checkpoints. + suffix (str): File extension for the checkpoint. Defaults to pth. + + Returns: + latest_path(str | None): File path of the latest checkpoint. + """ + if not osp.exists(path): + warnings.warn("The path of the checkpoints doesn't exist.") + return None + if osp.exists(osp.join(path, f'latest.{suffix}')): + return osp.join(path, f'latest.{suffix}') + + checkpoints = glob.glob(osp.join(path, f'*.{suffix}')) + if len(checkpoints) == 0: + warnings.warn('The are no checkpoints in the path') + return None + latest = -1 + latest_path = '' + for checkpoint in checkpoints: + if len(checkpoint) < len(latest_path): + continue + # `count` is iteration number, as checkpoints are saved as + # 'iter_xx.pth' or 'epoch_xx.pth' and xx is iteration number. + count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0]) + if count > latest: + latest = count + latest_path = checkpoint + return latest_path diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/set_env.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/set_env.py new file mode 100644 index 0000000..bf18453 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/set_env.py @@ -0,0 +1,55 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import platform + +import cv2 +import torch.multiprocessing as mp + +from ..utils import get_root_logger + + +def setup_multi_processes(cfg): + """Setup multi-processing environment variables.""" + logger = get_root_logger() + + # set multi-process start method + if platform.system() != 'Windows': + mp_start_method = cfg.get('mp_start_method', None) + current_method = mp.get_start_method(allow_none=True) + if mp_start_method in ('fork', 'spawn', 'forkserver'): + logger.info( + f'Multi-processing start method `{mp_start_method}` is ' + f'different from the previous setting `{current_method}`.' + f'It will be force set to `{mp_start_method}`.') + mp.set_start_method(mp_start_method, force=True) + else: + logger.info( + f'Multi-processing start method is `{mp_start_method}`') + + # disable opencv multithreading to avoid system being overloaded + opencv_num_threads = cfg.get('opencv_num_threads', None) + if isinstance(opencv_num_threads, int): + logger.info(f'OpenCV num_threads is `{opencv_num_threads}`') + cv2.setNumThreads(opencv_num_threads) + else: + logger.info(f'OpenCV num_threads is `{cv2.getNumThreads()}') + + if cfg.data.workers_per_gpu > 1: + # setup OMP threads + # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py # noqa + omp_num_threads = cfg.get('omp_num_threads', None) + if 'OMP_NUM_THREADS' not in os.environ: + if isinstance(omp_num_threads, int): + logger.info(f'OMP num threads is {omp_num_threads}') + os.environ['OMP_NUM_THREADS'] = str(omp_num_threads) + else: + logger.info(f'OMP num threads is {os.environ["OMP_NUM_THREADS"] }') + + # setup MKL threads + if 'MKL_NUM_THREADS' not in os.environ: + mkl_num_threads = cfg.get('mkl_num_threads', None) + if isinstance(mkl_num_threads, int): + logger.info(f'MKL num threads is {mkl_num_threads}') + os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads) + else: + logger.info(f'MKL num threads is {os.environ["MKL_NUM_THREADS"]}') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/util_distribution.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/util_distribution.py new file mode 100644 index 0000000..16651c2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/utils/util_distribution.py @@ -0,0 +1,81 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmcv +import torch +from mmcv.parallel import MMDataParallel, MMDistributedDataParallel + +from mmseg import digit_version + +dp_factory = {'cuda': MMDataParallel, 'cpu': MMDataParallel} + +ddp_factory = {'cuda': MMDistributedDataParallel} + + +def build_dp(model, device='cuda', dim=0, *args, **kwargs): + """build DataParallel module by device type. + + if device is cuda, return a MMDataParallel module; if device is mlu, + return a MLUDataParallel module. + + Args: + model (:class:`nn.Module`): module to be parallelized. + device (str): device type, cuda, cpu or mlu. Defaults to cuda. + dim (int): Dimension used to scatter the data. Defaults to 0. + + Returns: + :class:`nn.Module`: parallelized module. + """ + if device == 'cuda': + model = model.cuda() + elif device == 'mlu': + assert digit_version(mmcv.__version__) >= digit_version('1.5.0'), \ + 'Please use MMCV >= 1.5.0 for MLU training!' + from mmcv.device.mlu import MLUDataParallel + dp_factory['mlu'] = MLUDataParallel + model = model.mlu() + + return dp_factory[device](model, dim=dim, *args, **kwargs) + + +def build_ddp(model, device='cuda', *args, **kwargs): + """Build DistributedDataParallel module by device type. + + If device is cuda, return a MMDistributedDataParallel module; + if device is mlu, return a MLUDistributedDataParallel module. + + Args: + model (:class:`nn.Module`): module to be parallelized. + device (str): device type, mlu or cuda. + + Returns: + :class:`nn.Module`: parallelized module. + + References: + .. [1] https://pytorch.org/docs/stable/generated/torch.nn.parallel. + DistributedDataParallel.html + """ + assert device in ['cuda', 'mlu'], 'Only available for cuda or mlu devices.' + if device == 'cuda': + model = model.cuda() + elif device == 'mlu': + assert digit_version(mmcv.__version__) >= digit_version('1.5.0'), \ + 'Please use MMCV >= 1.5.0 for MLU training!' + from mmcv.device.mlu import MLUDistributedDataParallel + ddp_factory['mlu'] = MLUDistributedDataParallel + model = model.mlu() + + return ddp_factory[device](model, *args, **kwargs) + + +def is_mlu_available(): + """Returns a bool indicating if MLU is currently available.""" + return hasattr(torch, 'is_mlu_available') and torch.is_mlu_available() + + +def get_device(): + """Returns an available device, cpu, cuda or mlu.""" + is_device_available = { + 'cuda': torch.cuda.is_available(), + 'mlu': is_mlu_available() + } + device_list = [k for k, v in is_device_available.items() if v] + return device_list[0] if len(device_list) == 1 else 'cpu' diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/version.py b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/version.py new file mode 100644 index 0000000..eda9601 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/mmseg/version.py @@ -0,0 +1,18 @@ +# Copyright (c) Open-MMLab. All rights reserved. + +__version__ = '0.25.0' + + +def parse_version_info(version_str): + version_info = [] + for x in version_str.split('.'): + if x.isdigit(): + version_info.append(int(x)) + elif x.find('rc') != -1: + patch_version = x.split('rc') + version_info.append(int(patch_version[0])) + version_info.append(f'rc{patch_version[1]}') + return tuple(version_info) + + +version_info = parse_version_info(__version__) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/model-index.yml b/prediction/image/mx15hdi/Detect/mmsegmentation/model-index.yml new file mode 100644 index 0000000..2053fd0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/model-index.yml @@ -0,0 +1,45 @@ +Import: +- configs/ann/ann.yml +- configs/apcnet/apcnet.yml +- configs/beit/beit.yml +- configs/bisenetv1/bisenetv1.yml +- configs/bisenetv2/bisenetv2.yml +- configs/ccnet/ccnet.yml +- configs/cgnet/cgnet.yml +- configs/convnext/convnext.yml +- configs/danet/danet.yml +- configs/deeplabv3/deeplabv3.yml +- configs/deeplabv3plus/deeplabv3plus.yml +- configs/dmnet/dmnet.yml +- configs/dnlnet/dnlnet.yml +- configs/dpt/dpt.yml +- configs/emanet/emanet.yml +- configs/encnet/encnet.yml +- configs/erfnet/erfnet.yml +- configs/fastfcn/fastfcn.yml +- configs/fastscnn/fastscnn.yml +- configs/fcn/fcn.yml +- configs/gcnet/gcnet.yml +- configs/hrnet/hrnet.yml +- configs/icnet/icnet.yml +- configs/isanet/isanet.yml +- configs/knet/knet.yml +- configs/mae/mae.yml +- configs/mobilenet_v2/mobilenet_v2.yml +- configs/mobilenet_v3/mobilenet_v3.yml +- configs/nonlocal_net/nonlocal_net.yml +- configs/ocrnet/ocrnet.yml +- configs/point_rend/point_rend.yml +- configs/psanet/psanet.yml +- configs/pspnet/pspnet.yml +- configs/resnest/resnest.yml +- configs/segformer/segformer.yml +- configs/segmenter/segmenter.yml +- configs/sem_fpn/sem_fpn.yml +- configs/setr/setr.yml +- configs/stdc/stdc.yml +- configs/swin/swin.yml +- configs/twins/twins.yml +- configs/unet/unet.yml +- configs/upernet/upernet.yml +- configs/vit/vit.yml diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/pytest.ini b/prediction/image/mx15hdi/Detect/mmsegmentation/pytest.ini new file mode 100644 index 0000000..9796e87 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/pytest.ini @@ -0,0 +1,7 @@ +[pytest] +addopts = --xdoctest --xdoctest-style=auto +norecursedirs = .git ignore build __pycache__ data docker docs .eggs + +filterwarnings= default + ignore:.*No cfgstr given in Cacher constructor or call.*:Warning + ignore:.*Define the __nice__ method for.*:Warning diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/requirements.txt b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements.txt new file mode 100644 index 0000000..6da5ade --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements.txt @@ -0,0 +1,3 @@ +-r requirements/optional.txt +-r requirements/runtime.txt +-r requirements/tests.txt diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/docs.txt b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/docs.txt new file mode 100644 index 0000000..2017084 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/docs.txt @@ -0,0 +1,6 @@ +docutils==0.16.0 +myst-parser +-e git+https://github.com/gaotongxiao/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme +sphinx==4.0.2 +sphinx_copybutton +sphinx_markdown_tables diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/mminstall.txt b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/mminstall.txt new file mode 100644 index 0000000..bd43faf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/mminstall.txt @@ -0,0 +1,2 @@ +mmcls>=0.20.1 +mmcv-full>=1.4.4,<=1.6.0 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/optional.txt b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/optional.txt new file mode 100644 index 0000000..47fa593 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/optional.txt @@ -0,0 +1 @@ +cityscapesscripts diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/readthedocs.txt b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/readthedocs.txt new file mode 100644 index 0000000..22a894b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/readthedocs.txt @@ -0,0 +1,4 @@ +mmcv +prettytable +torch +torchvision diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/runtime.txt b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/runtime.txt new file mode 100644 index 0000000..520408f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/runtime.txt @@ -0,0 +1,5 @@ +matplotlib +mmcls>=0.20.1 +numpy +packaging +prettytable diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/tests.txt b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/tests.txt new file mode 100644 index 0000000..74fc761 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/requirements/tests.txt @@ -0,0 +1,6 @@ +codecov +flake8 +interrogate +pytest +xdoctest>=0.10.0 +yapf diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/resources/3dogs.jpg b/prediction/image/mx15hdi/Detect/mmsegmentation/resources/3dogs.jpg new file mode 100644 index 0000000..02ef6fc Binary files /dev/null and b/prediction/image/mx15hdi/Detect/mmsegmentation/resources/3dogs.jpg differ diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/resources/3dogs_mask.png b/prediction/image/mx15hdi/Detect/mmsegmentation/resources/3dogs_mask.png new file mode 100644 index 0000000..339c2f5 Binary files /dev/null and b/prediction/image/mx15hdi/Detect/mmsegmentation/resources/3dogs_mask.png differ diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/resources/mmseg-logo.png b/prediction/image/mx15hdi/Detect/mmsegmentation/resources/mmseg-logo.png new file mode 100644 index 0000000..009083a Binary files /dev/null and b/prediction/image/mx15hdi/Detect/mmsegmentation/resources/mmseg-logo.png differ diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/setup.cfg b/prediction/image/mx15hdi/Detect/mmsegmentation/setup.cfg new file mode 100644 index 0000000..ec1d341 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/setup.cfg @@ -0,0 +1,22 @@ +[yapf] +based_on_style = pep8 +blank_line_before_nested_class_or_def = true +split_before_expression_after_opening_paren = true + +[isort] +line_length = 79 +multi_line_output = 0 +extra_standard_library = setuptools +known_first_party = mmseg +known_third_party = PIL,cityscapesscripts,cv2,detail,matplotlib,mmcv,numpy,onnxruntime,packaging,prettytable,pytest,pytorch_sphinx_theme,requests,scipy,seaborn,torch,ts +no_lines_before = STDLIB,LOCALFOLDER +default_section = THIRDPARTY + +# ignore-words-list needs to be lowercase format. For example, if we want to +# ignore word "BA", then we need to append "ba" to ignore-words-list rather +# than "BA" +[codespell] +skip = *.po,*.ts,*.ipynb +count = +quiet-level = 3 +ignore-words-list = formating,sur,hist,dota,ba diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/setup.py b/prediction/image/mx15hdi/Detect/mmsegmentation/setup.py new file mode 100644 index 0000000..ad09e6c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/setup.py @@ -0,0 +1,201 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +import platform +import shutil +import sys +import warnings +from setuptools import find_packages, setup + + +def readme(): + with open('README.md', encoding='utf-8') as f: + content = f.read() + return content + + +version_file = 'mmseg/version.py' + + +def get_version(): + with open(version_file, 'r') as f: + exec(compile(f.read(), version_file, 'exec')) + return locals()['__version__'] + + +def parse_requirements(fname='requirements.txt', with_version=True): + """Parse the package dependencies listed in a requirements file but strips + specific versioning information. + + Args: + fname (str): path to requirements file + with_version (bool, default=False): if True include version specs + + Returns: + List[str]: list of requirements items + + CommandLine: + python -c "import setup; print(setup.parse_requirements())" + """ + import re + import sys + from os.path import exists + require_fpath = fname + + def parse_line(line): + """Parse information from a line in a requirements text file.""" + if line.startswith('-r '): + # Allow specifying requirements in other files + target = line.split(' ')[1] + for info in parse_require_file(target): + yield info + else: + info = {'line': line} + if line.startswith('-e '): + info['package'] = line.split('#egg=')[1] + else: + # Remove versioning from the package + pat = '(' + '|'.join(['>=', '==', '>']) + ')' + parts = re.split(pat, line, maxsplit=1) + parts = [p.strip() for p in parts] + + info['package'] = parts[0] + if len(parts) > 1: + op, rest = parts[1:] + if ';' in rest: + # Handle platform specific dependencies + # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies + version, platform_deps = map(str.strip, + rest.split(';')) + info['platform_deps'] = platform_deps + else: + version = rest # NOQA + info['version'] = (op, version) + yield info + + def parse_require_file(fpath): + with open(fpath, 'r') as f: + for line in f.readlines(): + line = line.strip() + if line and not line.startswith('#'): + for info in parse_line(line): + yield info + + def gen_packages_items(): + if exists(require_fpath): + for info in parse_require_file(require_fpath): + parts = [info['package']] + if with_version and 'version' in info: + parts.extend(info['version']) + if not sys.version.startswith('3.4'): + # apparently package_deps are broken in 3.4 + platform_deps = info.get('platform_deps') + if platform_deps is not None: + parts.append(';' + platform_deps) + item = ''.join(parts) + yield item + + packages = list(gen_packages_items()) + return packages + + +def add_mim_extension(): + """Add extra files that are required to support MIM into the package. + + These files will be added by creating a symlink to the originals if the + package is installed in `editable` mode (e.g. pip install -e .), or by + copying from the originals otherwise. + """ + + # parse installment mode + if 'develop' in sys.argv: + # installed by `pip install -e .` + if platform.system() == 'Windows': + # set `copy` mode here since symlink fails on Windows. + mode = 'copy' + else: + mode = 'symlink' + elif 'sdist' in sys.argv or 'bdist_wheel' in sys.argv or \ + platform.system() == 'Windows': + # installed by `pip install .` + # or create source distribution by `python setup.py sdist` + # set `copy` mode here since symlink fails with WinError on Windows. + mode = 'copy' + else: + return + + filenames = ['tools', 'configs', 'model-index.yml'] + repo_path = osp.dirname(__file__) + mim_path = osp.join(repo_path, 'mmseg', '.mim') + os.makedirs(mim_path, exist_ok=True) + + for filename in filenames: + if osp.exists(filename): + src_path = osp.join(repo_path, filename) + tar_path = osp.join(mim_path, filename) + + if osp.isfile(tar_path) or osp.islink(tar_path): + os.remove(tar_path) + elif osp.isdir(tar_path): + shutil.rmtree(tar_path) + + if mode == 'symlink': + src_relpath = osp.relpath(src_path, osp.dirname(tar_path)) + try: + os.symlink(src_relpath, tar_path) + except OSError: + # Creating a symbolic link on windows may raise an + # `OSError: [WinError 1314]` due to privilege. If + # the error happens, the src file will be copied + mode = 'copy' + warnings.warn( + f'Failed to create a symbolic link for {src_relpath}, ' + f'and it will be copied to {tar_path}') + else: + continue + + if mode == 'copy': + if osp.isfile(src_path): + shutil.copyfile(src_path, tar_path) + elif osp.isdir(src_path): + shutil.copytree(src_path, tar_path) + else: + warnings.warn(f'Cannot copy file {src_path}.') + else: + raise ValueError(f'Invalid mode {mode}') + + +if __name__ == '__main__': + add_mim_extension() + setup( + name='mmsegmentation', + version=get_version(), + description='Open MMLab Semantic Segmentation Toolbox and Benchmark', + long_description=readme(), + long_description_content_type='text/markdown', + author='MMSegmentation Contributors', + author_email='openmmlab@gmail.com', + keywords='computer vision, semantic segmentation', + url='http://github.com/open-mmlab/mmsegmentation', + packages=find_packages(exclude=('configs', 'tools', 'demo')), + include_package_data=True, + classifiers=[ + 'Development Status :: 4 - Beta', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + ], + license='Apache License 2.0', + install_requires=parse_requirements('requirements/runtime.txt'), + extras_require={ + 'all': parse_requirements('requirements.txt'), + 'tests': parse_requirements('requirements/tests.txt'), + 'build': parse_requirements('requirements/build.txt'), + 'optional': parse_requirements('requirements/optional.txt'), + 'mim': parse_requirements('requirements/mminstall.txt'), + }, + ext_modules=[], + zip_safe=False) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/__init__.py new file mode 100644 index 0000000..ef101fe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/__init__.py @@ -0,0 +1 @@ +# Copyright (c) OpenMMLab. All rights reserved. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_apis/test_single_gpu.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_apis/test_single_gpu.py new file mode 100644 index 0000000..0b484f2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_apis/test_single_gpu.py @@ -0,0 +1,73 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import shutil +from unittest.mock import MagicMock + +import numpy as np +import pytest +import torch +import torch.nn as nn +from torch.utils.data import DataLoader, Dataset, dataloader + +from mmseg.apis import single_gpu_test + + +class ExampleDataset(Dataset): + + def __getitem__(self, idx): + results = dict(img=torch.tensor([1]), img_metas=dict()) + return results + + def __len__(self): + return 1 + + +class ExampleModel(nn.Module): + + def __init__(self): + super(ExampleModel, self).__init__() + self.test_cfg = None + self.conv = nn.Conv2d(3, 3, 3) + + def forward(self, img, img_metas, return_loss=False, **kwargs): + return img + + +def test_single_gpu(): + test_dataset = ExampleDataset() + data_loader = DataLoader( + test_dataset, + batch_size=1, + sampler=None, + num_workers=0, + shuffle=False, + ) + model = ExampleModel() + + # Test efficient test compatibility (will be deprecated) + results = single_gpu_test(model, data_loader, efficient_test=True) + assert len(results) == 1 + pred = np.load(results[0]) + assert isinstance(pred, np.ndarray) + assert pred.shape == (1, ) + assert pred[0] == 1 + + shutil.rmtree('.efficient_test') + + # Test pre_eval + test_dataset.pre_eval = MagicMock(return_value=['success']) + results = single_gpu_test(model, data_loader, pre_eval=True) + assert results == ['success'] + + # Test format_only + test_dataset.format_results = MagicMock(return_value=['success']) + results = single_gpu_test(model, data_loader, format_only=True) + assert results == ['success'] + + # efficient_test, pre_eval and format_only are mutually exclusive + with pytest.raises(AssertionError): + single_gpu_test( + model, + dataloader, + efficient_test=True, + format_only=True, + pre_eval=True) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_config.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_config.py new file mode 100644 index 0000000..2482144 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_config.py @@ -0,0 +1,162 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import glob +import os +from os.path import dirname, exists, isdir, join, relpath + +from mmcv import Config +from torch import nn + +from mmseg.models import build_segmentor + + +def _get_config_directory(): + """Find the predefined segmentor config directory.""" + try: + # Assume we are running in the source mmsegmentation repo + repo_dpath = dirname(dirname(__file__)) + except NameError: + # For IPython development when this __file__ is not defined + import mmseg + repo_dpath = dirname(dirname(mmseg.__file__)) + config_dpath = join(repo_dpath, 'configs') + if not exists(config_dpath): + raise Exception('Cannot find config path') + return config_dpath + + +def test_config_build_segmentor(): + """Test that all segmentation models defined in the configs can be + initialized.""" + config_dpath = _get_config_directory() + print('Found config_dpath = {!r}'.format(config_dpath)) + + config_fpaths = [] + # one config each sub folder + for sub_folder in os.listdir(config_dpath): + if isdir(sub_folder): + config_fpaths.append( + list(glob.glob(join(config_dpath, sub_folder, '*.py')))[0]) + config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1] + config_names = [relpath(p, config_dpath) for p in config_fpaths] + + print('Using {} config files'.format(len(config_names))) + + for config_fname in config_names: + config_fpath = join(config_dpath, config_fname) + config_mod = Config.fromfile(config_fpath) + + config_mod.model + print('Building segmentor, config_fpath = {!r}'.format(config_fpath)) + + # Remove pretrained keys to allow for testing in an offline environment + if 'pretrained' in config_mod.model: + config_mod.model['pretrained'] = None + + print('building {}'.format(config_fname)) + segmentor = build_segmentor(config_mod.model) + assert segmentor is not None + + head_config = config_mod.model['decode_head'] + _check_decode_head(head_config, segmentor.decode_head) + + +def test_config_data_pipeline(): + """Test whether the data pipeline is valid and can process corner cases. + + CommandLine: + xdoctest -m tests/test_config.py test_config_build_data_pipeline + """ + import numpy as np + from mmcv import Config + + from mmseg.datasets.pipelines import Compose + + config_dpath = _get_config_directory() + print('Found config_dpath = {!r}'.format(config_dpath)) + + import glob + config_fpaths = list(glob.glob(join(config_dpath, '**', '*.py'))) + config_fpaths = [p for p in config_fpaths if p.find('_base_') == -1] + config_names = [relpath(p, config_dpath) for p in config_fpaths] + + print('Using {} config files'.format(len(config_names))) + + for config_fname in config_names: + config_fpath = join(config_dpath, config_fname) + print( + 'Building data pipeline, config_fpath = {!r}'.format(config_fpath)) + config_mod = Config.fromfile(config_fpath) + + # remove loading pipeline + load_img_pipeline = config_mod.train_pipeline.pop(0) + to_float32 = load_img_pipeline.get('to_float32', False) + config_mod.train_pipeline.pop(0) + config_mod.test_pipeline.pop(0) + + train_pipeline = Compose(config_mod.train_pipeline) + test_pipeline = Compose(config_mod.test_pipeline) + + img = np.random.randint(0, 255, size=(1024, 2048, 3), dtype=np.uint8) + if to_float32: + img = img.astype(np.float32) + seg = np.random.randint(0, 255, size=(1024, 2048, 1), dtype=np.uint8) + + results = dict( + filename='test_img.png', + ori_filename='test_img.png', + img=img, + img_shape=img.shape, + ori_shape=img.shape, + gt_semantic_seg=seg) + results['seg_fields'] = ['gt_semantic_seg'] + + print('Test training data pipeline: \n{!r}'.format(train_pipeline)) + output_results = train_pipeline(results) + assert output_results is not None + + results = dict( + filename='test_img.png', + ori_filename='test_img.png', + img=img, + img_shape=img.shape, + ori_shape=img.shape, + ) + print('Test testing data pipeline: \n{!r}'.format(test_pipeline)) + output_results = test_pipeline(results) + assert output_results is not None + + +def _check_decode_head(decode_head_cfg, decode_head): + if isinstance(decode_head_cfg, list): + assert isinstance(decode_head, nn.ModuleList) + assert len(decode_head_cfg) == len(decode_head) + num_heads = len(decode_head) + for i in range(num_heads): + _check_decode_head(decode_head_cfg[i], decode_head[i]) + return + # check consistency between head_config and roi_head + assert decode_head_cfg['type'] == decode_head.__class__.__name__ + + assert decode_head_cfg['type'] == decode_head.__class__.__name__ + + in_channels = decode_head_cfg.in_channels + input_transform = decode_head.input_transform + assert input_transform in ['resize_concat', 'multiple_select', None] + if input_transform is not None: + assert isinstance(in_channels, (list, tuple)) + assert isinstance(decode_head.in_index, (list, tuple)) + assert len(in_channels) == len(decode_head.in_index) + elif input_transform == 'resize_concat': + assert sum(in_channels) == decode_head.in_channels + else: + assert isinstance(in_channels, int) + assert in_channels == decode_head.in_channels + assert isinstance(decode_head.in_index, int) + + if decode_head_cfg['type'] == 'PointHead': + assert decode_head_cfg.channels+decode_head_cfg.num_classes == \ + decode_head.fc_seg.in_channels + assert decode_head.fc_seg.out_channels == decode_head_cfg.num_classes + else: + assert decode_head_cfg.channels == decode_head.conv_seg.in_channels + assert decode_head.conv_seg.out_channels == decode_head_cfg.num_classes diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_core/test_layer_decay_optimizer_constructor.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_core/test_layer_decay_optimizer_constructor.py new file mode 100644 index 0000000..4911f3b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_core/test_layer_decay_optimizer_constructor.py @@ -0,0 +1,275 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch +import torch.nn as nn +from mmcv.cnn import ConvModule + +from mmseg.core.optimizers.layer_decay_optimizer_constructor import ( + LayerDecayOptimizerConstructor, LearningRateDecayOptimizerConstructor) + +base_lr = 1 +decay_rate = 2 +base_wd = 0.05 +weight_decay = 0.05 + +expected_stage_wise_lr_wd_convnext = [{ + 'weight_decay': 0.0, + 'lr_scale': 128 +}, { + 'weight_decay': 0.0, + 'lr_scale': 1 +}, { + 'weight_decay': 0.05, + 'lr_scale': 64 +}, { + 'weight_decay': 0.0, + 'lr_scale': 64 +}, { + 'weight_decay': 0.05, + 'lr_scale': 32 +}, { + 'weight_decay': 0.0, + 'lr_scale': 32 +}, { + 'weight_decay': 0.05, + 'lr_scale': 16 +}, { + 'weight_decay': 0.0, + 'lr_scale': 16 +}, { + 'weight_decay': 0.05, + 'lr_scale': 8 +}, { + 'weight_decay': 0.0, + 'lr_scale': 8 +}, { + 'weight_decay': 0.05, + 'lr_scale': 128 +}, { + 'weight_decay': 0.05, + 'lr_scale': 1 +}] + +expected_layer_wise_lr_wd_convnext = [{ + 'weight_decay': 0.0, + 'lr_scale': 128 +}, { + 'weight_decay': 0.0, + 'lr_scale': 1 +}, { + 'weight_decay': 0.05, + 'lr_scale': 64 +}, { + 'weight_decay': 0.0, + 'lr_scale': 64 +}, { + 'weight_decay': 0.05, + 'lr_scale': 32 +}, { + 'weight_decay': 0.0, + 'lr_scale': 32 +}, { + 'weight_decay': 0.05, + 'lr_scale': 16 +}, { + 'weight_decay': 0.0, + 'lr_scale': 16 +}, { + 'weight_decay': 0.05, + 'lr_scale': 2 +}, { + 'weight_decay': 0.0, + 'lr_scale': 2 +}, { + 'weight_decay': 0.05, + 'lr_scale': 128 +}, { + 'weight_decay': 0.05, + 'lr_scale': 1 +}] + +expected_layer_wise_wd_lr_beit = [{ + 'weight_decay': 0.0, + 'lr_scale': 16 +}, { + 'weight_decay': 0.05, + 'lr_scale': 8 +}, { + 'weight_decay': 0.0, + 'lr_scale': 8 +}, { + 'weight_decay': 0.05, + 'lr_scale': 4 +}, { + 'weight_decay': 0.0, + 'lr_scale': 4 +}, { + 'weight_decay': 0.05, + 'lr_scale': 2 +}, { + 'weight_decay': 0.0, + 'lr_scale': 2 +}, { + 'weight_decay': 0.05, + 'lr_scale': 1 +}, { + 'weight_decay': 0.0, + 'lr_scale': 1 +}] + + +class ToyConvNeXt(nn.Module): + + def __init__(self): + super().__init__() + self.stages = nn.ModuleList() + for i in range(4): + stage = nn.Sequential(ConvModule(3, 4, kernel_size=1, bias=True)) + self.stages.append(stage) + self.norm0 = nn.BatchNorm2d(2) + + # add some variables to meet unit test coverate rate + self.cls_token = nn.Parameter(torch.ones(1)) + self.mask_token = nn.Parameter(torch.ones(1)) + self.pos_embed = nn.Parameter(torch.ones(1)) + self.stem_norm = nn.Parameter(torch.ones(1)) + self.downsample_norm0 = nn.BatchNorm2d(2) + self.downsample_norm1 = nn.BatchNorm2d(2) + self.downsample_norm2 = nn.BatchNorm2d(2) + self.lin = nn.Parameter(torch.ones(1)) + self.lin.requires_grad = False + self.downsample_layers = nn.ModuleList() + for _ in range(4): + stage = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=True)) + self.downsample_layers.append(stage) + + +class ToyBEiT(nn.Module): + + def __init__(self): + super().__init__() + # add some variables to meet unit test coverate rate + self.cls_token = nn.Parameter(torch.ones(1)) + self.patch_embed = nn.Parameter(torch.ones(1)) + self.layers = nn.ModuleList() + for _ in range(3): + layer = nn.Conv2d(3, 3, 1) + self.layers.append(layer) + + +class ToyMAE(nn.Module): + + def __init__(self): + super().__init__() + # add some variables to meet unit test coverate rate + self.cls_token = nn.Parameter(torch.ones(1)) + self.patch_embed = nn.Parameter(torch.ones(1)) + self.layers = nn.ModuleList() + for _ in range(3): + layer = nn.Conv2d(3, 3, 1) + self.layers.append(layer) + + +class ToySegmentor(nn.Module): + + def __init__(self, backbone): + super().__init__() + self.backbone = backbone + self.decode_head = nn.Conv2d(2, 2, kernel_size=1, groups=2) + + +class PseudoDataParallel(nn.Module): + + def __init__(self, model): + super().__init__() + self.module = model + + +class ToyViT(nn.Module): + + def __init__(self): + super().__init__() + + +def check_optimizer_lr_wd(optimizer, gt_lr_wd): + assert isinstance(optimizer, torch.optim.AdamW) + assert optimizer.defaults['lr'] == base_lr + assert optimizer.defaults['weight_decay'] == base_wd + param_groups = optimizer.param_groups + print(param_groups) + assert len(param_groups) == len(gt_lr_wd) + for i, param_dict in enumerate(param_groups): + assert param_dict['weight_decay'] == gt_lr_wd[i]['weight_decay'] + assert param_dict['lr_scale'] == gt_lr_wd[i]['lr_scale'] + assert param_dict['lr_scale'] == param_dict['lr'] + + +def test_learning_rate_decay_optimizer_constructor(): + + # Test lr wd for ConvNeXT + backbone = ToyConvNeXt() + model = PseudoDataParallel(ToySegmentor(backbone)) + optimizer_cfg = dict( + type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05) + # stagewise decay + stagewise_paramwise_cfg = dict( + decay_rate=decay_rate, decay_type='stage_wise', num_layers=6) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, stagewise_paramwise_cfg) + optimizer = optim_constructor(model) + check_optimizer_lr_wd(optimizer, expected_stage_wise_lr_wd_convnext) + # layerwise decay + layerwise_paramwise_cfg = dict( + decay_rate=decay_rate, decay_type='layer_wise', num_layers=6) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, layerwise_paramwise_cfg) + optimizer = optim_constructor(model) + check_optimizer_lr_wd(optimizer, expected_layer_wise_lr_wd_convnext) + + # Test lr wd for BEiT + backbone = ToyBEiT() + model = PseudoDataParallel(ToySegmentor(backbone)) + + layerwise_paramwise_cfg = dict( + decay_rate=decay_rate, decay_type='layer_wise', num_layers=3) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, layerwise_paramwise_cfg) + optimizer = optim_constructor(model) + check_optimizer_lr_wd(optimizer, expected_layer_wise_wd_lr_beit) + + # Test invalidation of lr wd for Vit + backbone = ToyViT() + model = PseudoDataParallel(ToySegmentor(backbone)) + with pytest.raises(NotImplementedError): + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, layerwise_paramwise_cfg) + optimizer = optim_constructor(model) + with pytest.raises(NotImplementedError): + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, stagewise_paramwise_cfg) + optimizer = optim_constructor(model) + + # Test lr wd for MAE + backbone = ToyMAE() + model = PseudoDataParallel(ToySegmentor(backbone)) + + layerwise_paramwise_cfg = dict( + decay_rate=decay_rate, decay_type='layer_wise', num_layers=3) + optim_constructor = LearningRateDecayOptimizerConstructor( + optimizer_cfg, layerwise_paramwise_cfg) + optimizer = optim_constructor(model) + check_optimizer_lr_wd(optimizer, expected_layer_wise_wd_lr_beit) + + +def test_beit_layer_decay_optimizer_constructor(): + + # paramwise_cfg with BEiTExampleModel + backbone = ToyBEiT() + model = PseudoDataParallel(ToySegmentor(backbone)) + optimizer_cfg = dict( + type='AdamW', lr=1, betas=(0.9, 0.999), weight_decay=0.05) + paramwise_cfg = dict(layer_decay_rate=2, num_layers=3) + optim_constructor = LayerDecayOptimizerConstructor(optimizer_cfg, + paramwise_cfg) + optimizer = optim_constructor(model) + check_optimizer_lr_wd(optimizer, expected_layer_wise_wd_lr_beit) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_core/test_optimizer.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_core/test_optimizer.py new file mode 100644 index 0000000..247f9fe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_core/test_optimizer.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch +import torch.nn as nn +from mmcv.runner import DefaultOptimizerConstructor + +from mmseg.core.builder import (OPTIMIZER_BUILDERS, build_optimizer, + build_optimizer_constructor) + + +class ExampleModel(nn.Module): + + def __init__(self): + super().__init__() + self.param1 = nn.Parameter(torch.ones(1)) + self.conv1 = nn.Conv2d(3, 4, kernel_size=1, bias=False) + self.conv2 = nn.Conv2d(4, 2, kernel_size=1) + self.bn = nn.BatchNorm2d(2) + + def forward(self, x): + return x + + +base_lr = 0.01 +base_wd = 0.0001 +momentum = 0.9 + + +def test_build_optimizer_constructor(): + optimizer_cfg = dict( + type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) + optim_constructor_cfg = dict( + type='DefaultOptimizerConstructor', optimizer_cfg=optimizer_cfg) + optim_constructor = build_optimizer_constructor(optim_constructor_cfg) + # Test whether optimizer constructor can be built from parent. + assert type(optim_constructor) is DefaultOptimizerConstructor + + @OPTIMIZER_BUILDERS.register_module() + class MyOptimizerConstructor(DefaultOptimizerConstructor): + pass + + optim_constructor_cfg = dict( + type='MyOptimizerConstructor', optimizer_cfg=optimizer_cfg) + optim_constructor = build_optimizer_constructor(optim_constructor_cfg) + # Test optimizer constructor can be built from child registry. + assert type(optim_constructor) is MyOptimizerConstructor + + # Test unregistered constructor cannot be built + with pytest.raises(KeyError): + build_optimizer_constructor(dict(type='A')) + + +def test_build_optimizer(): + model = ExampleModel() + optimizer_cfg = dict( + type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum) + optimizer = build_optimizer(model, optimizer_cfg) + # test whether optimizer is successfully built from parent. + assert isinstance(optimizer, torch.optim.SGD) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_dataset.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_dataset.py new file mode 100644 index 0000000..6ea6eb9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_dataset.py @@ -0,0 +1,851 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os +import os.path as osp +import shutil +import tempfile +from typing import Generator +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest +import torch +from PIL import Image + +from mmseg.core.evaluation import get_classes, get_palette +from mmseg.datasets import (DATASETS, ADE20KDataset, CityscapesDataset, + COCOStuffDataset, ConcatDataset, CustomDataset, + ISPRSDataset, LoveDADataset, MultiImageMixDataset, + PascalVOCDataset, PotsdamDataset, RepeatDataset, + build_dataset, iSAIDDataset) + + +def test_classes(): + assert list(CityscapesDataset.CLASSES) == get_classes('cityscapes') + assert list(PascalVOCDataset.CLASSES) == get_classes('voc') == get_classes( + 'pascal_voc') + assert list( + ADE20KDataset.CLASSES) == get_classes('ade') == get_classes('ade20k') + assert list(COCOStuffDataset.CLASSES) == get_classes('cocostuff') + assert list(LoveDADataset.CLASSES) == get_classes('loveda') + assert list(PotsdamDataset.CLASSES) == get_classes('potsdam') + assert list(ISPRSDataset.CLASSES) == get_classes('vaihingen') + assert list(iSAIDDataset.CLASSES) == get_classes('isaid') + + with pytest.raises(ValueError): + get_classes('unsupported') + + +def test_classes_file_path(): + tmp_file = tempfile.NamedTemporaryFile() + classes_path = f'{tmp_file.name}.txt' + train_pipeline = [dict(type='LoadImageFromFile')] + kwargs = dict(pipeline=train_pipeline, img_dir='./', classes=classes_path) + + # classes.txt with full categories + categories = get_classes('cityscapes') + with open(classes_path, 'w') as f: + f.write('\n'.join(categories)) + assert list(CityscapesDataset(**kwargs).CLASSES) == categories + + # classes.txt with sub categories + categories = ['road', 'sidewalk', 'building'] + with open(classes_path, 'w') as f: + f.write('\n'.join(categories)) + assert list(CityscapesDataset(**kwargs).CLASSES) == categories + + # classes.txt with unknown categories + categories = ['road', 'sidewalk', 'unknown'] + with open(classes_path, 'w') as f: + f.write('\n'.join(categories)) + + with pytest.raises(ValueError): + CityscapesDataset(**kwargs) + + tmp_file.close() + os.remove(classes_path) + assert not osp.exists(classes_path) + + +def test_palette(): + assert CityscapesDataset.PALETTE == get_palette('cityscapes') + assert PascalVOCDataset.PALETTE == get_palette('voc') == get_palette( + 'pascal_voc') + assert ADE20KDataset.PALETTE == get_palette('ade') == get_palette('ade20k') + assert LoveDADataset.PALETTE == get_palette('loveda') + assert PotsdamDataset.PALETTE == get_palette('potsdam') + assert COCOStuffDataset.PALETTE == get_palette('cocostuff') + assert iSAIDDataset.PALETTE == get_palette('isaid') + + with pytest.raises(ValueError): + get_palette('unsupported') + + +@patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock) +@patch('mmseg.datasets.CustomDataset.__getitem__', + MagicMock(side_effect=lambda idx: idx)) +def test_dataset_wrapper(): + # CustomDataset.load_annotations = MagicMock() + # CustomDataset.__getitem__ = MagicMock(side_effect=lambda idx: idx) + dataset_a = CustomDataset(img_dir=MagicMock(), pipeline=[]) + len_a = 10 + dataset_a.img_infos = MagicMock() + dataset_a.img_infos.__len__.return_value = len_a + dataset_b = CustomDataset(img_dir=MagicMock(), pipeline=[]) + len_b = 20 + dataset_b.img_infos = MagicMock() + dataset_b.img_infos.__len__.return_value = len_b + + concat_dataset = ConcatDataset([dataset_a, dataset_b]) + assert concat_dataset[5] == 5 + assert concat_dataset[25] == 15 + assert len(concat_dataset) == len(dataset_a) + len(dataset_b) + + repeat_dataset = RepeatDataset(dataset_a, 10) + assert repeat_dataset[5] == 5 + assert repeat_dataset[15] == 5 + assert repeat_dataset[27] == 7 + assert len(repeat_dataset) == 10 * len(dataset_a) + + img_scale = (60, 60) + pipeline = [ + dict(type='RandomMosaic', prob=1, img_scale=img_scale), + dict(type='RandomFlip', prob=0.5), + dict(type='Resize', img_scale=img_scale, keep_ratio=False), + ] + + CustomDataset.load_annotations = MagicMock() + results = [] + for _ in range(2): + height = np.random.randint(10, 30) + weight = np.random.randint(10, 30) + img = np.ones((height, weight, 3)) + gt_semantic_seg = np.random.randint(5, size=(height, weight)) + results.append(dict(gt_semantic_seg=gt_semantic_seg, img=img)) + + classes = ['0', '1', '2', '3', '4'] + palette = [(0, 0, 0), (1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4)] + CustomDataset.__getitem__ = MagicMock(side_effect=lambda idx: results[idx]) + dataset_a = CustomDataset( + img_dir=MagicMock(), + pipeline=[], + test_mode=True, + classes=classes, + palette=palette) + len_a = 2 + dataset_a.img_infos = MagicMock() + dataset_a.img_infos.__len__.return_value = len_a + + multi_image_mix_dataset = MultiImageMixDataset(dataset_a, pipeline) + assert len(multi_image_mix_dataset) == len(dataset_a) + + for idx in range(len_a): + results_ = multi_image_mix_dataset[idx] + + # test skip_type_keys + multi_image_mix_dataset = MultiImageMixDataset( + dataset_a, pipeline, skip_type_keys=('RandomFlip')) + for idx in range(len_a): + results_ = multi_image_mix_dataset[idx] + assert results_['img'].shape == (img_scale[0], img_scale[1], 3) + + skip_type_keys = ('RandomFlip', 'Resize') + multi_image_mix_dataset.update_skip_type_keys(skip_type_keys) + for idx in range(len_a): + results_ = multi_image_mix_dataset[idx] + assert results_['img'].shape[:2] != img_scale + + # test pipeline + with pytest.raises(TypeError): + pipeline = [['Resize']] + multi_image_mix_dataset = MultiImageMixDataset(dataset_a, pipeline) + + +def test_custom_dataset(): + img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True) + crop_size = (512, 1024) + train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(128, 256), ratio_range=(0.5, 2.0)), + dict(type='RandomCrop', crop_size=crop_size, cat_max_ratio=0.75), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size=crop_size, pad_val=0, seg_pad_val=255), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_semantic_seg']), + ] + test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(128, 256), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) + ] + + # with img_dir and ann_dir + train_dataset = CustomDataset( + train_pipeline, + data_root=osp.join(osp.dirname(__file__), '../data/pseudo_dataset'), + img_dir='imgs/', + ann_dir='gts/', + img_suffix='img.jpg', + seg_map_suffix='gt.png') + assert len(train_dataset) == 5 + + # with img_dir, ann_dir, split + train_dataset = CustomDataset( + train_pipeline, + data_root=osp.join(osp.dirname(__file__), '../data/pseudo_dataset'), + img_dir='imgs/', + ann_dir='gts/', + img_suffix='img.jpg', + seg_map_suffix='gt.png', + split='splits/train.txt') + assert len(train_dataset) == 4 + + # no data_root + train_dataset = CustomDataset( + train_pipeline, + img_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs'), + ann_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/gts'), + img_suffix='img.jpg', + seg_map_suffix='gt.png') + assert len(train_dataset) == 5 + + # with data_root but img_dir/ann_dir are abs path + train_dataset = CustomDataset( + train_pipeline, + data_root=osp.join(osp.dirname(__file__), '../data/pseudo_dataset'), + img_dir=osp.abspath( + osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs')), + ann_dir=osp.abspath( + osp.join(osp.dirname(__file__), '../data/pseudo_dataset/gts')), + img_suffix='img.jpg', + seg_map_suffix='gt.png') + assert len(train_dataset) == 5 + + # test_mode=True + test_dataset = CustomDataset( + test_pipeline, + img_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs'), + img_suffix='img.jpg', + test_mode=True, + classes=('pseudo_class', )) + assert len(test_dataset) == 5 + + # training data get + train_data = train_dataset[0] + assert isinstance(train_data, dict) + + # test data get + test_data = test_dataset[0] + assert isinstance(test_data, dict) + + # get gt seg map + gt_seg_maps = train_dataset.get_gt_seg_maps(efficient_test=True) + assert isinstance(gt_seg_maps, Generator) + gt_seg_maps = list(gt_seg_maps) + assert len(gt_seg_maps) == 5 + + # format_results not implemented + with pytest.raises(NotImplementedError): + test_dataset.format_results([], '') + + pseudo_results = [] + for gt_seg_map in gt_seg_maps: + h, w = gt_seg_map.shape + pseudo_results.append(np.random.randint(low=0, high=7, size=(h, w))) + + # test past evaluation without CLASSES + with pytest.raises(TypeError): + eval_results = train_dataset.evaluate(pseudo_results, metric=['mIoU']) + + with pytest.raises(TypeError): + eval_results = train_dataset.evaluate(pseudo_results, metric='mDice') + + with pytest.raises(TypeError): + eval_results = train_dataset.evaluate( + pseudo_results, metric=['mDice', 'mIoU']) + + # test past evaluation with CLASSES + train_dataset.CLASSES = tuple(['a'] * 7) + eval_results = train_dataset.evaluate(pseudo_results, metric='mIoU') + assert isinstance(eval_results, dict) + assert 'mIoU' in eval_results + assert 'mAcc' in eval_results + assert 'aAcc' in eval_results + + eval_results = train_dataset.evaluate(pseudo_results, metric='mDice') + assert isinstance(eval_results, dict) + assert 'mDice' in eval_results + assert 'mAcc' in eval_results + assert 'aAcc' in eval_results + + eval_results = train_dataset.evaluate(pseudo_results, metric='mFscore') + assert isinstance(eval_results, dict) + assert 'mRecall' in eval_results + assert 'mPrecision' in eval_results + assert 'mFscore' in eval_results + assert 'aAcc' in eval_results + + eval_results = train_dataset.evaluate( + pseudo_results, metric=['mIoU', 'mDice', 'mFscore']) + assert isinstance(eval_results, dict) + assert 'mIoU' in eval_results + assert 'mDice' in eval_results + assert 'mAcc' in eval_results + assert 'aAcc' in eval_results + assert 'mFscore' in eval_results + assert 'mPrecision' in eval_results + assert 'mRecall' in eval_results + + assert not np.isnan(eval_results['mIoU']) + assert not np.isnan(eval_results['mDice']) + assert not np.isnan(eval_results['mAcc']) + assert not np.isnan(eval_results['aAcc']) + assert not np.isnan(eval_results['mFscore']) + assert not np.isnan(eval_results['mPrecision']) + assert not np.isnan(eval_results['mRecall']) + + # test evaluation with pre-eval and the dataset.CLASSES is necessary + train_dataset.CLASSES = tuple(['a'] * 7) + pseudo_results = [] + for idx in range(len(train_dataset)): + h, w = gt_seg_maps[idx].shape + pseudo_result = np.random.randint(low=0, high=7, size=(h, w)) + pseudo_results.extend(train_dataset.pre_eval(pseudo_result, idx)) + eval_results = train_dataset.evaluate(pseudo_results, metric=['mIoU']) + assert isinstance(eval_results, dict) + assert 'mIoU' in eval_results + assert 'mAcc' in eval_results + assert 'aAcc' in eval_results + + eval_results = train_dataset.evaluate(pseudo_results, metric='mDice') + assert isinstance(eval_results, dict) + assert 'mDice' in eval_results + assert 'mAcc' in eval_results + assert 'aAcc' in eval_results + + eval_results = train_dataset.evaluate(pseudo_results, metric='mFscore') + assert isinstance(eval_results, dict) + assert 'mRecall' in eval_results + assert 'mPrecision' in eval_results + assert 'mFscore' in eval_results + assert 'aAcc' in eval_results + + eval_results = train_dataset.evaluate( + pseudo_results, metric=['mIoU', 'mDice', 'mFscore']) + assert isinstance(eval_results, dict) + assert 'mIoU' in eval_results + assert 'mDice' in eval_results + assert 'mAcc' in eval_results + assert 'aAcc' in eval_results + assert 'mFscore' in eval_results + assert 'mPrecision' in eval_results + assert 'mRecall' in eval_results + + assert not np.isnan(eval_results['mIoU']) + assert not np.isnan(eval_results['mDice']) + assert not np.isnan(eval_results['mAcc']) + assert not np.isnan(eval_results['aAcc']) + assert not np.isnan(eval_results['mFscore']) + assert not np.isnan(eval_results['mPrecision']) + assert not np.isnan(eval_results['mRecall']) + + +@pytest.mark.parametrize('separate_eval', [True, False]) +def test_eval_concat_custom_dataset(separate_eval): + img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True) + test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiScaleFlipAug', + img_scale=(128, 256), + # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75], + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) + ] + data_root = osp.join(osp.dirname(__file__), '../data/pseudo_dataset') + img_dir = 'imgs/' + ann_dir = 'gts/' + + cfg1 = dict( + type='CustomDataset', + pipeline=test_pipeline, + data_root=data_root, + img_dir=img_dir, + ann_dir=ann_dir, + img_suffix='img.jpg', + seg_map_suffix='gt.png', + classes=tuple(['a'] * 7)) + dataset1 = build_dataset(cfg1) + assert len(dataset1) == 5 + # get gt seg map + gt_seg_maps = dataset1.get_gt_seg_maps(efficient_test=True) + assert isinstance(gt_seg_maps, Generator) + gt_seg_maps = list(gt_seg_maps) + assert len(gt_seg_maps) == 5 + + # test past evaluation + pseudo_results = [] + for gt_seg_map in gt_seg_maps: + h, w = gt_seg_map.shape + pseudo_results.append(np.random.randint(low=0, high=7, size=(h, w))) + eval_results1 = dataset1.evaluate( + pseudo_results, metric=['mIoU', 'mDice', 'mFscore']) + + # We use same dir twice for simplicity + # with ann_dir + cfg2 = dict( + type='CustomDataset', + pipeline=test_pipeline, + data_root=data_root, + img_dir=[img_dir, img_dir], + ann_dir=[ann_dir, ann_dir], + img_suffix='img.jpg', + seg_map_suffix='gt.png', + classes=tuple(['a'] * 7), + separate_eval=separate_eval) + dataset2 = build_dataset(cfg2) + assert isinstance(dataset2, ConcatDataset) + assert len(dataset2) == 10 + + eval_results2 = dataset2.evaluate( + pseudo_results * 2, metric=['mIoU', 'mDice', 'mFscore']) + + if separate_eval: + assert eval_results1['mIoU'] == eval_results2[ + '0_mIoU'] == eval_results2['1_mIoU'] + assert eval_results1['mDice'] == eval_results2[ + '0_mDice'] == eval_results2['1_mDice'] + assert eval_results1['mAcc'] == eval_results2[ + '0_mAcc'] == eval_results2['1_mAcc'] + assert eval_results1['aAcc'] == eval_results2[ + '0_aAcc'] == eval_results2['1_aAcc'] + assert eval_results1['mFscore'] == eval_results2[ + '0_mFscore'] == eval_results2['1_mFscore'] + assert eval_results1['mPrecision'] == eval_results2[ + '0_mPrecision'] == eval_results2['1_mPrecision'] + assert eval_results1['mRecall'] == eval_results2[ + '0_mRecall'] == eval_results2['1_mRecall'] + else: + assert eval_results1['mIoU'] == eval_results2['mIoU'] + assert eval_results1['mDice'] == eval_results2['mDice'] + assert eval_results1['mAcc'] == eval_results2['mAcc'] + assert eval_results1['aAcc'] == eval_results2['aAcc'] + assert eval_results1['mFscore'] == eval_results2['mFscore'] + assert eval_results1['mPrecision'] == eval_results2['mPrecision'] + assert eval_results1['mRecall'] == eval_results2['mRecall'] + + # test get dataset_idx and sample_idx from ConcateDataset + dataset_idx, sample_idx = dataset2.get_dataset_idx_and_sample_idx(3) + assert dataset_idx == 0 + assert sample_idx == 3 + + dataset_idx, sample_idx = dataset2.get_dataset_idx_and_sample_idx(7) + assert dataset_idx == 1 + assert sample_idx == 2 + + dataset_idx, sample_idx = dataset2.get_dataset_idx_and_sample_idx(-7) + assert dataset_idx == 0 + assert sample_idx == 3 + + # test negative indice exceed length of dataset + with pytest.raises(ValueError): + dataset_idx, sample_idx = dataset2.get_dataset_idx_and_sample_idx(-11) + + # test negative indice value + indice = -6 + dataset_idx1, sample_idx1 = dataset2.get_dataset_idx_and_sample_idx(indice) + dataset_idx2, sample_idx2 = dataset2.get_dataset_idx_and_sample_idx( + len(dataset2) + indice) + assert dataset_idx1 == dataset_idx2 + assert sample_idx1 == sample_idx2 + + # test evaluation with pre-eval and the dataset.CLASSES is necessary + pseudo_results = [] + eval_results1 = [] + for idx in range(len(dataset1)): + h, w = gt_seg_maps[idx].shape + pseudo_result = np.random.randint(low=0, high=7, size=(h, w)) + pseudo_results.append(pseudo_result) + eval_results1.extend(dataset1.pre_eval(pseudo_result, idx)) + + assert len(eval_results1) == len(dataset1) + assert isinstance(eval_results1[0], tuple) + assert len(eval_results1[0]) == 4 + assert isinstance(eval_results1[0][0], torch.Tensor) + + eval_results1 = dataset1.evaluate( + eval_results1, metric=['mIoU', 'mDice', 'mFscore']) + + pseudo_results = pseudo_results * 2 + eval_results2 = [] + for idx in range(len(dataset2)): + eval_results2.extend(dataset2.pre_eval(pseudo_results[idx], idx)) + + assert len(eval_results2) == len(dataset2) + assert isinstance(eval_results2[0], tuple) + assert len(eval_results2[0]) == 4 + assert isinstance(eval_results2[0][0], torch.Tensor) + + eval_results2 = dataset2.evaluate( + eval_results2, metric=['mIoU', 'mDice', 'mFscore']) + + if separate_eval: + assert eval_results1['mIoU'] == eval_results2[ + '0_mIoU'] == eval_results2['1_mIoU'] + assert eval_results1['mDice'] == eval_results2[ + '0_mDice'] == eval_results2['1_mDice'] + assert eval_results1['mAcc'] == eval_results2[ + '0_mAcc'] == eval_results2['1_mAcc'] + assert eval_results1['aAcc'] == eval_results2[ + '0_aAcc'] == eval_results2['1_aAcc'] + assert eval_results1['mFscore'] == eval_results2[ + '0_mFscore'] == eval_results2['1_mFscore'] + assert eval_results1['mPrecision'] == eval_results2[ + '0_mPrecision'] == eval_results2['1_mPrecision'] + assert eval_results1['mRecall'] == eval_results2[ + '0_mRecall'] == eval_results2['1_mRecall'] + else: + assert eval_results1['mIoU'] == eval_results2['mIoU'] + assert eval_results1['mDice'] == eval_results2['mDice'] + assert eval_results1['mAcc'] == eval_results2['mAcc'] + assert eval_results1['aAcc'] == eval_results2['aAcc'] + assert eval_results1['mFscore'] == eval_results2['mFscore'] + assert eval_results1['mPrecision'] == eval_results2['mPrecision'] + assert eval_results1['mRecall'] == eval_results2['mRecall'] + + # test batch_indices for pre eval + eval_results2 = dataset2.pre_eval(pseudo_results, + list(range(len(pseudo_results)))) + + assert len(eval_results2) == len(dataset2) + assert isinstance(eval_results2[0], tuple) + assert len(eval_results2[0]) == 4 + assert isinstance(eval_results2[0][0], torch.Tensor) + + eval_results2 = dataset2.evaluate( + eval_results2, metric=['mIoU', 'mDice', 'mFscore']) + + if separate_eval: + assert eval_results1['mIoU'] == eval_results2[ + '0_mIoU'] == eval_results2['1_mIoU'] + assert eval_results1['mDice'] == eval_results2[ + '0_mDice'] == eval_results2['1_mDice'] + assert eval_results1['mAcc'] == eval_results2[ + '0_mAcc'] == eval_results2['1_mAcc'] + assert eval_results1['aAcc'] == eval_results2[ + '0_aAcc'] == eval_results2['1_aAcc'] + assert eval_results1['mFscore'] == eval_results2[ + '0_mFscore'] == eval_results2['1_mFscore'] + assert eval_results1['mPrecision'] == eval_results2[ + '0_mPrecision'] == eval_results2['1_mPrecision'] + assert eval_results1['mRecall'] == eval_results2[ + '0_mRecall'] == eval_results2['1_mRecall'] + else: + assert eval_results1['mIoU'] == eval_results2['mIoU'] + assert eval_results1['mDice'] == eval_results2['mDice'] + assert eval_results1['mAcc'] == eval_results2['mAcc'] + assert eval_results1['aAcc'] == eval_results2['aAcc'] + assert eval_results1['mFscore'] == eval_results2['mFscore'] + assert eval_results1['mPrecision'] == eval_results2['mPrecision'] + assert eval_results1['mRecall'] == eval_results2['mRecall'] + + +def test_ade(): + test_dataset = ADE20KDataset( + pipeline=[], + img_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs')) + assert len(test_dataset) == 5 + + # Test format_results + pseudo_results = [] + for _ in range(len(test_dataset)): + h, w = (2, 2) + pseudo_results.append(np.random.randint(low=0, high=7, size=(h, w))) + + file_paths = test_dataset.format_results(pseudo_results, '.format_ade') + assert len(file_paths) == len(test_dataset) + temp = np.array(Image.open(file_paths[0])) + assert np.allclose(temp, pseudo_results[0] + 1) + + shutil.rmtree('.format_ade') + + +@pytest.mark.parametrize('separate_eval', [True, False]) +def test_concat_ade(separate_eval): + test_dataset = ADE20KDataset( + pipeline=[], + img_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs')) + assert len(test_dataset) == 5 + + concat_dataset = ConcatDataset([test_dataset, test_dataset], + separate_eval=separate_eval) + assert len(concat_dataset) == 10 + # Test format_results + pseudo_results = [] + for _ in range(len(concat_dataset)): + h, w = (2, 2) + pseudo_results.append(np.random.randint(low=0, high=7, size=(h, w))) + + # test format per image + file_paths = [] + for i in range(len(pseudo_results)): + file_paths.extend( + concat_dataset.format_results([pseudo_results[i]], + '.format_ade', + indices=[i])) + assert len(file_paths) == len(concat_dataset) + temp = np.array(Image.open(file_paths[0])) + assert np.allclose(temp, pseudo_results[0] + 1) + + shutil.rmtree('.format_ade') + + # test default argument + file_paths = concat_dataset.format_results(pseudo_results, '.format_ade') + assert len(file_paths) == len(concat_dataset) + temp = np.array(Image.open(file_paths[0])) + assert np.allclose(temp, pseudo_results[0] + 1) + + shutil.rmtree('.format_ade') + + +def test_cityscapes(): + test_dataset = CityscapesDataset( + pipeline=[], + img_dir=osp.join( + osp.dirname(__file__), + '../data/pseudo_cityscapes_dataset/leftImg8bit'), + ann_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_cityscapes_dataset/gtFine')) + assert len(test_dataset) == 1 + + gt_seg_maps = list(test_dataset.get_gt_seg_maps()) + + # Test format_results + pseudo_results = [] + for idx in range(len(test_dataset)): + h, w = gt_seg_maps[idx].shape + pseudo_results.append(np.random.randint(low=0, high=19, size=(h, w))) + + file_paths = test_dataset.format_results(pseudo_results, '.format_city') + assert len(file_paths) == len(test_dataset) + temp = np.array(Image.open(file_paths[0])) + assert np.allclose(temp, + test_dataset._convert_to_label_id(pseudo_results[0])) + + # Test cityscapes evaluate + + test_dataset.evaluate( + pseudo_results, metric='cityscapes', imgfile_prefix='.format_city') + + shutil.rmtree('.format_city') + + +@pytest.mark.parametrize('separate_eval', [True, False]) +def test_concat_cityscapes(separate_eval): + cityscape_dataset = CityscapesDataset( + pipeline=[], + img_dir=osp.join( + osp.dirname(__file__), + '../data/pseudo_cityscapes_dataset/leftImg8bit'), + ann_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_cityscapes_dataset/gtFine')) + assert len(cityscape_dataset) == 1 + with pytest.raises(NotImplementedError): + _ = ConcatDataset([cityscape_dataset, cityscape_dataset], + separate_eval=separate_eval) + ade_dataset = ADE20KDataset( + pipeline=[], + img_dir=osp.join(osp.dirname(__file__), '../data/pseudo_dataset/imgs')) + assert len(ade_dataset) == 5 + with pytest.raises(NotImplementedError): + _ = ConcatDataset([cityscape_dataset, ade_dataset], + separate_eval=separate_eval) + + +def test_loveda(): + test_dataset = LoveDADataset( + pipeline=[], + img_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_loveda_dataset/img_dir'), + ann_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_loveda_dataset/ann_dir')) + assert len(test_dataset) == 3 + + gt_seg_maps = list(test_dataset.get_gt_seg_maps()) + + # Test format_results + pseudo_results = [] + for idx in range(len(test_dataset)): + h, w = gt_seg_maps[idx].shape + pseudo_results.append(np.random.randint(low=0, high=7, size=(h, w))) + file_paths = test_dataset.format_results(pseudo_results, '.format_loveda') + assert len(file_paths) == len(test_dataset) + # Test loveda evaluate + + test_dataset.evaluate( + pseudo_results, metric='mIoU', imgfile_prefix='.format_loveda') + + shutil.rmtree('.format_loveda') + + +def test_potsdam(): + test_dataset = PotsdamDataset( + pipeline=[], + img_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_potsdam_dataset/img_dir'), + ann_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_potsdam_dataset/ann_dir')) + assert len(test_dataset) == 1 + + +def test_vaihingen(): + test_dataset = ISPRSDataset( + pipeline=[], + img_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_vaihingen_dataset/img_dir'), + ann_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_vaihingen_dataset/ann_dir')) + assert len(test_dataset) == 1 + + +def test_isaid(): + test_dataset = iSAIDDataset( + pipeline=[], + img_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_isaid_dataset/img_dir'), + ann_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_isaid_dataset/ann_dir')) + assert len(test_dataset) == 2 + isaid_info = test_dataset.load_annotations( + img_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_isaid_dataset/img_dir'), + img_suffix='.png', + ann_dir=osp.join( + osp.dirname(__file__), '../data/pseudo_isaid_dataset/ann_dir'), + seg_map_suffix='.png', + split=osp.join( + osp.dirname(__file__), + '../data/pseudo_isaid_dataset/splits/train.txt')) + assert len(isaid_info) == 1 + + +@patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock) +@patch('mmseg.datasets.CustomDataset.__getitem__', + MagicMock(side_effect=lambda idx: idx)) +@pytest.mark.parametrize('dataset, classes', [ + ('ADE20KDataset', ('wall', 'building')), + ('CityscapesDataset', ('road', 'sidewalk')), + ('CustomDataset', ('bus', 'car')), + ('PascalVOCDataset', ('aeroplane', 'bicycle')), +]) +def test_custom_classes_override_default(dataset, classes): + + dataset_class = DATASETS.get(dataset) + + original_classes = dataset_class.CLASSES + + # Test setting classes as a tuple + custom_dataset = dataset_class( + pipeline=[], + img_dir=MagicMock(), + split=MagicMock(), + classes=classes, + test_mode=True) + + assert custom_dataset.CLASSES != original_classes + assert custom_dataset.CLASSES == classes + + # Test setting classes as a list + custom_dataset = dataset_class( + pipeline=[], + img_dir=MagicMock(), + split=MagicMock(), + classes=list(classes), + test_mode=True) + + assert custom_dataset.CLASSES != original_classes + assert custom_dataset.CLASSES == list(classes) + + # Test overriding not a subset + custom_dataset = dataset_class( + pipeline=[], + img_dir=MagicMock(), + split=MagicMock(), + classes=[classes[0]], + test_mode=True) + + assert custom_dataset.CLASSES != original_classes + assert custom_dataset.CLASSES == [classes[0]] + + # Test default behavior + if dataset_class is CustomDataset: + with pytest.raises(AssertionError): + custom_dataset = dataset_class( + pipeline=[], + img_dir=MagicMock(), + split=MagicMock(), + classes=None, + test_mode=True) + else: + custom_dataset = dataset_class( + pipeline=[], + img_dir=MagicMock(), + split=MagicMock(), + classes=None, + test_mode=True) + + assert custom_dataset.CLASSES == original_classes + + +@patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock) +@patch('mmseg.datasets.CustomDataset.__getitem__', + MagicMock(side_effect=lambda idx: idx)) +def test_custom_dataset_random_palette_is_generated(): + dataset = CustomDataset( + pipeline=[], + img_dir=MagicMock(), + split=MagicMock(), + classes=('bus', 'car'), + test_mode=True) + assert len(dataset.PALETTE) == 2 + for class_color in dataset.PALETTE: + assert len(class_color) == 3 + assert all(x >= 0 and x <= 255 for x in class_color) + + +@patch('mmseg.datasets.CustomDataset.load_annotations', MagicMock) +@patch('mmseg.datasets.CustomDataset.__getitem__', + MagicMock(side_effect=lambda idx: idx)) +def test_custom_dataset_custom_palette(): + dataset = CustomDataset( + pipeline=[], + img_dir=MagicMock(), + split=MagicMock(), + classes=('bus', 'car'), + palette=[[100, 100, 100], [200, 200, 200]], + test_mode=True) + assert tuple(dataset.PALETTE) == tuple([[100, 100, 100], [200, 200, 200]]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_dataset_builder.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_dataset_builder.py new file mode 100644 index 0000000..30910b0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_dataset_builder.py @@ -0,0 +1,200 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import os.path as osp + +import pytest +from torch.utils.data import (DistributedSampler, RandomSampler, + SequentialSampler) + +from mmseg.datasets import (DATASETS, ConcatDataset, MultiImageMixDataset, + build_dataloader, build_dataset) + + +@DATASETS.register_module() +class ToyDataset(object): + + def __init__(self, cnt=0): + self.cnt = cnt + + def __item__(self, idx): + return idx + + def __len__(self): + return 100 + + +def test_build_dataset(): + cfg = dict(type='ToyDataset') + dataset = build_dataset(cfg) + assert isinstance(dataset, ToyDataset) + assert dataset.cnt == 0 + dataset = build_dataset(cfg, default_args=dict(cnt=1)) + assert isinstance(dataset, ToyDataset) + assert dataset.cnt == 1 + + data_root = osp.join(osp.dirname(__file__), '../data/pseudo_dataset') + img_dir = 'imgs/' + ann_dir = 'gts/' + + # We use same dir twice for simplicity + # with ann_dir + cfg = dict( + type='CustomDataset', + pipeline=[], + data_root=data_root, + img_dir=[img_dir, img_dir], + ann_dir=[ann_dir, ann_dir]) + dataset = build_dataset(cfg) + assert isinstance(dataset, ConcatDataset) + assert len(dataset) == 10 + + cfg = dict(type='MultiImageMixDataset', dataset=cfg, pipeline=[]) + dataset = build_dataset(cfg) + assert isinstance(dataset, MultiImageMixDataset) + assert len(dataset) == 10 + + # with ann_dir, split + cfg = dict( + type='CustomDataset', + pipeline=[], + data_root=data_root, + img_dir=img_dir, + ann_dir=ann_dir, + split=['splits/train.txt', 'splits/val.txt']) + dataset = build_dataset(cfg) + assert isinstance(dataset, ConcatDataset) + assert len(dataset) == 5 + + # with ann_dir, split + cfg = dict( + type='CustomDataset', + pipeline=[], + data_root=data_root, + img_dir=img_dir, + ann_dir=[ann_dir, ann_dir], + split=['splits/train.txt', 'splits/val.txt']) + dataset = build_dataset(cfg) + assert isinstance(dataset, ConcatDataset) + assert len(dataset) == 5 + + # test mode + cfg = dict( + type='CustomDataset', + pipeline=[], + data_root=data_root, + img_dir=[img_dir, img_dir], + test_mode=True, + classes=('pseudo_class', )) + dataset = build_dataset(cfg) + assert isinstance(dataset, ConcatDataset) + assert len(dataset) == 10 + + # test mode with splits + cfg = dict( + type='CustomDataset', + pipeline=[], + data_root=data_root, + img_dir=[img_dir, img_dir], + split=['splits/val.txt', 'splits/val.txt'], + test_mode=True, + classes=('pseudo_class', )) + dataset = build_dataset(cfg) + assert isinstance(dataset, ConcatDataset) + assert len(dataset) == 2 + + # len(ann_dir) should be zero or len(img_dir) when len(img_dir) > 1 + with pytest.raises(AssertionError): + cfg = dict( + type='CustomDataset', + pipeline=[], + data_root=data_root, + img_dir=[img_dir, img_dir], + ann_dir=[ann_dir, ann_dir, ann_dir]) + build_dataset(cfg) + + # len(splits) should be zero or len(img_dir) when len(img_dir) > 1 + with pytest.raises(AssertionError): + cfg = dict( + type='CustomDataset', + pipeline=[], + data_root=data_root, + img_dir=[img_dir, img_dir], + split=['splits/val.txt', 'splits/val.txt', 'splits/val.txt']) + build_dataset(cfg) + + # len(splits) == len(ann_dir) when only len(img_dir) == 1 and len( + # ann_dir) > 1 + with pytest.raises(AssertionError): + cfg = dict( + type='CustomDataset', + pipeline=[], + data_root=data_root, + img_dir=img_dir, + ann_dir=[ann_dir, ann_dir], + split=['splits/val.txt', 'splits/val.txt', 'splits/val.txt']) + build_dataset(cfg) + + +def test_build_dataloader(): + dataset = ToyDataset() + samples_per_gpu = 3 + # dist=True, shuffle=True, 1GPU + dataloader = build_dataloader( + dataset, samples_per_gpu=samples_per_gpu, workers_per_gpu=2) + assert dataloader.batch_size == samples_per_gpu + assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu)) + assert isinstance(dataloader.sampler, DistributedSampler) + assert dataloader.sampler.shuffle + + # dist=True, shuffle=False, 1GPU + dataloader = build_dataloader( + dataset, + samples_per_gpu=samples_per_gpu, + workers_per_gpu=2, + shuffle=False) + assert dataloader.batch_size == samples_per_gpu + assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu)) + assert isinstance(dataloader.sampler, DistributedSampler) + assert not dataloader.sampler.shuffle + + # dist=True, shuffle=True, 8GPU + dataloader = build_dataloader( + dataset, + samples_per_gpu=samples_per_gpu, + workers_per_gpu=2, + num_gpus=8) + assert dataloader.batch_size == samples_per_gpu + assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu)) + assert dataloader.num_workers == 2 + + # dist=False, shuffle=True, 1GPU + dataloader = build_dataloader( + dataset, + samples_per_gpu=samples_per_gpu, + workers_per_gpu=2, + dist=False) + assert dataloader.batch_size == samples_per_gpu + assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu)) + assert isinstance(dataloader.sampler, RandomSampler) + assert dataloader.num_workers == 2 + + # dist=False, shuffle=False, 1GPU + dataloader = build_dataloader( + dataset, + samples_per_gpu=3, + workers_per_gpu=2, + shuffle=False, + dist=False) + assert dataloader.batch_size == samples_per_gpu + assert len(dataloader) == int(math.ceil(len(dataset) / samples_per_gpu)) + assert isinstance(dataloader.sampler, SequentialSampler) + assert dataloader.num_workers == 2 + + # dist=False, shuffle=True, 8GPU + dataloader = build_dataloader( + dataset, samples_per_gpu=3, workers_per_gpu=2, num_gpus=8, dist=False) + assert dataloader.batch_size == samples_per_gpu * 8 + assert len(dataloader) == int( + math.ceil(len(dataset) / samples_per_gpu / 8)) + assert isinstance(dataloader.sampler, RandomSampler) + assert dataloader.num_workers == 16 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_loading.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_loading.py new file mode 100644 index 0000000..fdda93e --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_loading.py @@ -0,0 +1,199 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import os.path as osp +import tempfile + +import mmcv +import numpy as np + +from mmseg.datasets.pipelines import LoadAnnotations, LoadImageFromFile + + +class TestLoading(object): + + @classmethod + def setup_class(cls): + cls.data_prefix = osp.join(osp.dirname(__file__), '../data') + + def test_load_img(self): + results = dict( + img_prefix=self.data_prefix, img_info=dict(filename='color.jpg')) + transform = LoadImageFromFile() + results = transform(copy.deepcopy(results)) + assert results['filename'] == osp.join(self.data_prefix, 'color.jpg') + assert results['ori_filename'] == 'color.jpg' + assert results['img'].shape == (288, 512, 3) + assert results['img'].dtype == np.uint8 + assert results['img_shape'] == (288, 512, 3) + assert results['ori_shape'] == (288, 512, 3) + assert results['pad_shape'] == (288, 512, 3) + assert results['scale_factor'] == 1.0 + np.testing.assert_equal(results['img_norm_cfg']['mean'], + np.zeros(3, dtype=np.float32)) + assert repr(transform) == transform.__class__.__name__ + \ + "(to_float32=False,color_type='color',imdecode_backend='cv2')" + + # no img_prefix + results = dict( + img_prefix=None, img_info=dict(filename='tests/data/color.jpg')) + transform = LoadImageFromFile() + results = transform(copy.deepcopy(results)) + assert results['filename'] == 'tests/data/color.jpg' + assert results['ori_filename'] == 'tests/data/color.jpg' + assert results['img'].shape == (288, 512, 3) + + # to_float32 + transform = LoadImageFromFile(to_float32=True) + results = transform(copy.deepcopy(results)) + assert results['img'].dtype == np.float32 + + # gray image + results = dict( + img_prefix=self.data_prefix, img_info=dict(filename='gray.jpg')) + transform = LoadImageFromFile() + results = transform(copy.deepcopy(results)) + assert results['img'].shape == (288, 512, 3) + assert results['img'].dtype == np.uint8 + + transform = LoadImageFromFile(color_type='unchanged') + results = transform(copy.deepcopy(results)) + assert results['img'].shape == (288, 512) + assert results['img'].dtype == np.uint8 + np.testing.assert_equal(results['img_norm_cfg']['mean'], + np.zeros(1, dtype=np.float32)) + + def test_load_seg(self): + results = dict( + seg_prefix=self.data_prefix, + ann_info=dict(seg_map='seg.png'), + seg_fields=[]) + transform = LoadAnnotations() + results = transform(copy.deepcopy(results)) + assert results['seg_fields'] == ['gt_semantic_seg'] + assert results['gt_semantic_seg'].shape == (288, 512) + assert results['gt_semantic_seg'].dtype == np.uint8 + assert repr(transform) == transform.__class__.__name__ + \ + "(reduce_zero_label=False,imdecode_backend='pillow')" + + # no img_prefix + results = dict( + seg_prefix=None, + ann_info=dict(seg_map='tests/data/seg.png'), + seg_fields=[]) + transform = LoadAnnotations() + results = transform(copy.deepcopy(results)) + assert results['gt_semantic_seg'].shape == (288, 512) + assert results['gt_semantic_seg'].dtype == np.uint8 + + # reduce_zero_label + transform = LoadAnnotations(reduce_zero_label=True) + results = transform(copy.deepcopy(results)) + assert results['gt_semantic_seg'].shape == (288, 512) + assert results['gt_semantic_seg'].dtype == np.uint8 + + # mmcv backend + results = dict( + seg_prefix=self.data_prefix, + ann_info=dict(seg_map='seg.png'), + seg_fields=[]) + transform = LoadAnnotations(imdecode_backend='pillow') + results = transform(copy.deepcopy(results)) + # this image is saved by PIL + assert results['gt_semantic_seg'].shape == (288, 512) + assert results['gt_semantic_seg'].dtype == np.uint8 + + def test_load_seg_custom_classes(self): + + test_img = np.random.rand(10, 10) + test_gt = np.zeros_like(test_img) + test_gt[2:4, 2:4] = 1 + test_gt[2:4, 6:8] = 2 + test_gt[6:8, 2:4] = 3 + test_gt[6:8, 6:8] = 4 + + tmp_dir = tempfile.TemporaryDirectory() + img_path = osp.join(tmp_dir.name, 'img.jpg') + gt_path = osp.join(tmp_dir.name, 'gt.png') + + mmcv.imwrite(test_img, img_path) + mmcv.imwrite(test_gt, gt_path) + + # test only train with label with id 3 + results = dict( + img_info=dict(filename=img_path), + ann_info=dict(seg_map=gt_path), + label_map={ + 0: 0, + 1: 0, + 2: 0, + 3: 1, + 4: 0 + }, + seg_fields=[]) + + load_imgs = LoadImageFromFile() + results = load_imgs(copy.deepcopy(results)) + + load_anns = LoadAnnotations() + results = load_anns(copy.deepcopy(results)) + + gt_array = results['gt_semantic_seg'] + + true_mask = np.zeros_like(gt_array) + true_mask[6:8, 2:4] = 1 + + assert results['seg_fields'] == ['gt_semantic_seg'] + assert gt_array.shape == (10, 10) + assert gt_array.dtype == np.uint8 + np.testing.assert_array_equal(gt_array, true_mask) + + # test only train with label with id 4 and 3 + results = dict( + img_info=dict(filename=img_path), + ann_info=dict(seg_map=gt_path), + label_map={ + 0: 0, + 1: 0, + 2: 0, + 3: 2, + 4: 1 + }, + seg_fields=[]) + + load_imgs = LoadImageFromFile() + results = load_imgs(copy.deepcopy(results)) + + load_anns = LoadAnnotations() + results = load_anns(copy.deepcopy(results)) + + gt_array = results['gt_semantic_seg'] + + true_mask = np.zeros_like(gt_array) + true_mask[6:8, 2:4] = 2 + true_mask[6:8, 6:8] = 1 + + assert results['seg_fields'] == ['gt_semantic_seg'] + assert gt_array.shape == (10, 10) + assert gt_array.dtype == np.uint8 + np.testing.assert_array_equal(gt_array, true_mask) + + # test no custom classes + results = dict( + img_info=dict(filename=img_path), + ann_info=dict(seg_map=gt_path), + seg_fields=[]) + + load_imgs = LoadImageFromFile() + results = load_imgs(copy.deepcopy(results)) + + load_anns = LoadAnnotations() + results = load_anns(copy.deepcopy(results)) + + gt_array = results['gt_semantic_seg'] + + assert results['seg_fields'] == ['gt_semantic_seg'] + assert gt_array.shape == (10, 10) + assert gt_array.dtype == np.uint8 + np.testing.assert_array_equal(gt_array, test_gt) + + tmp_dir.cleanup() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_transform.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_transform.py new file mode 100644 index 0000000..fcc46e7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_transform.py @@ -0,0 +1,690 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import copy +import os.path as osp + +import mmcv +import numpy as np +import pytest +from mmcv.utils import build_from_cfg +from PIL import Image + +from mmseg.datasets.builder import PIPELINES + + +def test_resize_to_multiple(): + transform = dict(type='ResizeToMultiple', size_divisor=32) + transform = build_from_cfg(transform, PIPELINES) + + img = np.random.randn(213, 232, 3) + seg = np.random.randint(0, 19, (213, 232)) + results = dict() + results['img'] = img + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + results['img_shape'] = img.shape + results['pad_shape'] = img.shape + + results = transform(results) + assert results['img'].shape == (224, 256, 3) + assert results['gt_semantic_seg'].shape == (224, 256) + assert results['img_shape'] == (224, 256, 3) + assert results['pad_shape'] == (224, 256, 3) + + +def test_resize(): + # test assertion if img_scale is a list + with pytest.raises(AssertionError): + transform = dict(type='Resize', img_scale=[1333, 800], keep_ratio=True) + build_from_cfg(transform, PIPELINES) + + # test assertion if len(img_scale) while ratio_range is not None + with pytest.raises(AssertionError): + transform = dict( + type='Resize', + img_scale=[(1333, 800), (1333, 600)], + ratio_range=(0.9, 1.1), + keep_ratio=True) + build_from_cfg(transform, PIPELINES) + + # test assertion for invalid multiscale_mode + with pytest.raises(AssertionError): + transform = dict( + type='Resize', + img_scale=[(1333, 800), (1333, 600)], + keep_ratio=True, + multiscale_mode='2333') + build_from_cfg(transform, PIPELINES) + + transform = dict(type='Resize', img_scale=(1333, 800), keep_ratio=True) + resize_module = build_from_cfg(transform, PIPELINES) + + results = dict() + # (288, 512, 3) + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + resized_results = resize_module(results.copy()) + assert resized_results['img_shape'] == (750, 1333, 3) + + # test keep_ratio=False + transform = dict( + type='Resize', + img_scale=(1280, 800), + multiscale_mode='value', + keep_ratio=False) + resize_module = build_from_cfg(transform, PIPELINES) + resized_results = resize_module(results.copy()) + assert resized_results['img_shape'] == (800, 1280, 3) + + # test multiscale_mode='range' + transform = dict( + type='Resize', + img_scale=[(1333, 400), (1333, 1200)], + multiscale_mode='range', + keep_ratio=True) + resize_module = build_from_cfg(transform, PIPELINES) + resized_results = resize_module(results.copy()) + assert max(resized_results['img_shape'][:2]) <= 1333 + assert min(resized_results['img_shape'][:2]) >= 400 + assert min(resized_results['img_shape'][:2]) <= 1200 + + # test multiscale_mode='value' + transform = dict( + type='Resize', + img_scale=[(1333, 800), (1333, 400)], + multiscale_mode='value', + keep_ratio=True) + resize_module = build_from_cfg(transform, PIPELINES) + resized_results = resize_module(results.copy()) + assert resized_results['img_shape'] in [(750, 1333, 3), (400, 711, 3)] + + # test multiscale_mode='range' + transform = dict( + type='Resize', + img_scale=(1333, 800), + ratio_range=(0.9, 1.1), + keep_ratio=True) + resize_module = build_from_cfg(transform, PIPELINES) + resized_results = resize_module(results.copy()) + assert max(resized_results['img_shape'][:2]) <= 1333 * 1.1 + + # test img_scale=None and ratio_range is tuple. + # img shape: (288, 512, 3) + transform = dict( + type='Resize', img_scale=None, ratio_range=(0.5, 2.0), keep_ratio=True) + resize_module = build_from_cfg(transform, PIPELINES) + resized_results = resize_module(results.copy()) + assert int(288 * 0.5) <= resized_results['img_shape'][0] <= 288 * 2.0 + assert int(512 * 0.5) <= resized_results['img_shape'][1] <= 512 * 2.0 + + # test min_size=640 + transform = dict(type='Resize', img_scale=(2560, 640), min_size=640) + resize_module = build_from_cfg(transform, PIPELINES) + resized_results = resize_module(results.copy()) + assert resized_results['img_shape'] == (640, 1138, 3) + + # test min_size=640 and img_scale=(512, 640) + transform = dict(type='Resize', img_scale=(512, 640), min_size=640) + resize_module = build_from_cfg(transform, PIPELINES) + resized_results = resize_module(results.copy()) + assert resized_results['img_shape'] == (640, 1138, 3) + + # test h > w + img = np.random.randn(512, 288, 3) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + transform = dict(type='Resize', img_scale=(2560, 640), min_size=640) + resize_module = build_from_cfg(transform, PIPELINES) + resized_results = resize_module(results.copy()) + assert resized_results['img_shape'] == (1138, 640, 3) + + +def test_flip(): + # test assertion for invalid prob + with pytest.raises(AssertionError): + transform = dict(type='RandomFlip', prob=1.5) + build_from_cfg(transform, PIPELINES) + + # test assertion for invalid direction + with pytest.raises(AssertionError): + transform = dict(type='RandomFlip', prob=1, direction='horizonta') + build_from_cfg(transform, PIPELINES) + + transform = dict(type='RandomFlip', prob=1) + flip_module = build_from_cfg(transform, PIPELINES) + + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + original_img = copy.deepcopy(img) + seg = np.array( + Image.open(osp.join(osp.dirname(__file__), '../data/seg.png'))) + original_seg = copy.deepcopy(seg) + results['img'] = img + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + results = flip_module(results) + + flip_module = build_from_cfg(transform, PIPELINES) + results = flip_module(results) + assert np.equal(original_img, results['img']).all() + assert np.equal(original_seg, results['gt_semantic_seg']).all() + + +def test_random_crop(): + # test assertion for invalid random crop + with pytest.raises(AssertionError): + transform = dict(type='RandomCrop', crop_size=(-1, 0)) + build_from_cfg(transform, PIPELINES) + + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + seg = np.array( + Image.open(osp.join(osp.dirname(__file__), '../data/seg.png'))) + results['img'] = img + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + h, w, _ = img.shape + transform = dict(type='RandomCrop', crop_size=(h - 20, w - 20)) + crop_module = build_from_cfg(transform, PIPELINES) + results = crop_module(results) + assert results['img'].shape[:2] == (h - 20, w - 20) + assert results['img_shape'][:2] == (h - 20, w - 20) + assert results['gt_semantic_seg'].shape[:2] == (h - 20, w - 20) + + +def test_pad(): + # test assertion if both size_divisor and size is None + with pytest.raises(AssertionError): + transform = dict(type='Pad') + build_from_cfg(transform, PIPELINES) + + transform = dict(type='Pad', size_divisor=32) + transform = build_from_cfg(transform, PIPELINES) + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + original_img = copy.deepcopy(img) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + results = transform(results) + # original img already divisible by 32 + assert np.equal(results['img'], original_img).all() + img_shape = results['img'].shape + assert img_shape[0] % 32 == 0 + assert img_shape[1] % 32 == 0 + + resize_transform = dict( + type='Resize', img_scale=(1333, 800), keep_ratio=True) + resize_module = build_from_cfg(resize_transform, PIPELINES) + results = resize_module(results) + results = transform(results) + img_shape = results['img'].shape + assert img_shape[0] % 32 == 0 + assert img_shape[1] % 32 == 0 + + +def test_rotate(): + # test assertion degree should be tuple[float] or float + with pytest.raises(AssertionError): + transform = dict(type='RandomRotate', prob=0.5, degree=-10) + build_from_cfg(transform, PIPELINES) + # test assertion degree should be tuple[float] or float + with pytest.raises(AssertionError): + transform = dict(type='RandomRotate', prob=0.5, degree=(10., 20., 30.)) + build_from_cfg(transform, PIPELINES) + + transform = dict(type='RandomRotate', degree=10., prob=1.) + transform = build_from_cfg(transform, PIPELINES) + + assert str(transform) == f'RandomRotate(' \ + f'prob={1.}, ' \ + f'degree=({-10.}, {10.}), ' \ + f'pad_val={0}, ' \ + f'seg_pad_val={255}, ' \ + f'center={None}, ' \ + f'auto_bound={False})' + + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + h, w, _ = img.shape + seg = np.array( + Image.open(osp.join(osp.dirname(__file__), '../data/seg.png'))) + results['img'] = img + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + results = transform(results) + assert results['img'].shape[:2] == (h, w) + assert results['gt_semantic_seg'].shape[:2] == (h, w) + + +def test_normalize(): + img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True) + transform = dict(type='Normalize', **img_norm_cfg) + transform = build_from_cfg(transform, PIPELINES) + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + original_img = copy.deepcopy(img) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + results = transform(results) + + mean = np.array(img_norm_cfg['mean']) + std = np.array(img_norm_cfg['std']) + converted_img = (original_img[..., ::-1] - mean) / std + assert np.allclose(results['img'], converted_img) + + +def test_rgb2gray(): + # test assertion out_channels should be greater than 0 + with pytest.raises(AssertionError): + transform = dict(type='RGB2Gray', out_channels=-1) + build_from_cfg(transform, PIPELINES) + # test assertion weights should be tuple[float] + with pytest.raises(AssertionError): + transform = dict(type='RGB2Gray', out_channels=1, weights=1.1) + build_from_cfg(transform, PIPELINES) + + # test out_channels is None + transform = dict(type='RGB2Gray') + transform = build_from_cfg(transform, PIPELINES) + + assert str(transform) == f'RGB2Gray(' \ + f'out_channels={None}, ' \ + f'weights={(0.299, 0.587, 0.114)})' + + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + h, w, c = img.shape + seg = np.array( + Image.open(osp.join(osp.dirname(__file__), '../data/seg.png'))) + results['img'] = img + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + results = transform(results) + assert results['img'].shape == (h, w, c) + assert results['img_shape'] == (h, w, c) + assert results['ori_shape'] == (h, w, c) + + # test out_channels = 2 + transform = dict(type='RGB2Gray', out_channels=2) + transform = build_from_cfg(transform, PIPELINES) + + assert str(transform) == f'RGB2Gray(' \ + f'out_channels={2}, ' \ + f'weights={(0.299, 0.587, 0.114)})' + + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + h, w, c = img.shape + seg = np.array( + Image.open(osp.join(osp.dirname(__file__), '../data/seg.png'))) + results['img'] = img + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + results = transform(results) + assert results['img'].shape == (h, w, 2) + assert results['img_shape'] == (h, w, 2) + assert results['ori_shape'] == (h, w, c) + + +def test_adjust_gamma(): + # test assertion if gamma <= 0 + with pytest.raises(AssertionError): + transform = dict(type='AdjustGamma', gamma=0) + build_from_cfg(transform, PIPELINES) + + # test assertion if gamma is list + with pytest.raises(AssertionError): + transform = dict(type='AdjustGamma', gamma=[1.2]) + build_from_cfg(transform, PIPELINES) + + # test with gamma = 1.2 + transform = dict(type='AdjustGamma', gamma=1.2) + transform = build_from_cfg(transform, PIPELINES) + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + original_img = copy.deepcopy(img) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + results = transform(results) + + inv_gamma = 1.0 / 1.2 + table = np.array([((i / 255.0)**inv_gamma) * 255 + for i in np.arange(0, 256)]).astype('uint8') + converted_img = mmcv.lut_transform( + np.array(original_img, dtype=np.uint8), table) + assert np.allclose(results['img'], converted_img) + assert str(transform) == f'AdjustGamma(gamma={1.2})' + + +def test_rerange(): + # test assertion if min_value or max_value is illegal + with pytest.raises(AssertionError): + transform = dict(type='Rerange', min_value=[0], max_value=[255]) + build_from_cfg(transform, PIPELINES) + + # test assertion if min_value >= max_value + with pytest.raises(AssertionError): + transform = dict(type='Rerange', min_value=1, max_value=1) + build_from_cfg(transform, PIPELINES) + + # test assertion if img_min_value == img_max_value + with pytest.raises(AssertionError): + transform = dict(type='Rerange', min_value=0, max_value=1) + transform = build_from_cfg(transform, PIPELINES) + results = dict() + results['img'] = np.array([[1, 1], [1, 1]]) + transform(results) + + img_rerange_cfg = dict() + transform = dict(type='Rerange', **img_rerange_cfg) + transform = build_from_cfg(transform, PIPELINES) + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + original_img = copy.deepcopy(img) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + results = transform(results) + + min_value = np.min(original_img) + max_value = np.max(original_img) + converted_img = (original_img - min_value) / (max_value - min_value) * 255 + + assert np.allclose(results['img'], converted_img) + assert str(transform) == f'Rerange(min_value={0}, max_value={255})' + + +def test_CLAHE(): + # test assertion if clip_limit is None + with pytest.raises(AssertionError): + transform = dict(type='CLAHE', clip_limit=None) + build_from_cfg(transform, PIPELINES) + + # test assertion if tile_grid_size is illegal + with pytest.raises(AssertionError): + transform = dict(type='CLAHE', tile_grid_size=(8.0, 8.0)) + build_from_cfg(transform, PIPELINES) + + # test assertion if tile_grid_size is illegal + with pytest.raises(AssertionError): + transform = dict(type='CLAHE', tile_grid_size=(9, 9, 9)) + build_from_cfg(transform, PIPELINES) + + transform = dict(type='CLAHE', clip_limit=2) + transform = build_from_cfg(transform, PIPELINES) + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + original_img = copy.deepcopy(img) + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + results = transform(results) + + converted_img = np.empty(original_img.shape) + for i in range(original_img.shape[2]): + converted_img[:, :, i] = mmcv.clahe( + np.array(original_img[:, :, i], dtype=np.uint8), 2, (8, 8)) + + assert np.allclose(results['img'], converted_img) + assert str(transform) == f'CLAHE(clip_limit={2}, tile_grid_size={(8, 8)})' + + +def test_seg_rescale(): + results = dict() + seg = np.array( + Image.open(osp.join(osp.dirname(__file__), '../data/seg.png'))) + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + h, w = seg.shape + + transform = dict(type='SegRescale', scale_factor=1. / 2) + rescale_module = build_from_cfg(transform, PIPELINES) + rescale_results = rescale_module(results.copy()) + assert rescale_results['gt_semantic_seg'].shape == (h // 2, w // 2) + + transform = dict(type='SegRescale', scale_factor=1) + rescale_module = build_from_cfg(transform, PIPELINES) + rescale_results = rescale_module(results.copy()) + assert rescale_results['gt_semantic_seg'].shape == (h, w) + + +def test_cutout(): + # test prob + with pytest.raises(AssertionError): + transform = dict(type='RandomCutOut', prob=1.5, n_holes=1) + build_from_cfg(transform, PIPELINES) + # test n_holes + with pytest.raises(AssertionError): + transform = dict( + type='RandomCutOut', prob=0.5, n_holes=(5, 3), cutout_shape=(8, 8)) + build_from_cfg(transform, PIPELINES) + with pytest.raises(AssertionError): + transform = dict( + type='RandomCutOut', + prob=0.5, + n_holes=(3, 4, 5), + cutout_shape=(8, 8)) + build_from_cfg(transform, PIPELINES) + # test cutout_shape and cutout_ratio + with pytest.raises(AssertionError): + transform = dict( + type='RandomCutOut', prob=0.5, n_holes=1, cutout_shape=8) + build_from_cfg(transform, PIPELINES) + with pytest.raises(AssertionError): + transform = dict( + type='RandomCutOut', prob=0.5, n_holes=1, cutout_ratio=0.2) + build_from_cfg(transform, PIPELINES) + # either of cutout_shape and cutout_ratio should be given + with pytest.raises(AssertionError): + transform = dict(type='RandomCutOut', prob=0.5, n_holes=1) + build_from_cfg(transform, PIPELINES) + with pytest.raises(AssertionError): + transform = dict( + type='RandomCutOut', + prob=0.5, + n_holes=1, + cutout_shape=(2, 2), + cutout_ratio=(0.4, 0.4)) + build_from_cfg(transform, PIPELINES) + # test seg_fill_in + with pytest.raises(AssertionError): + transform = dict( + type='RandomCutOut', + prob=0.5, + n_holes=1, + cutout_shape=(8, 8), + seg_fill_in='a') + build_from_cfg(transform, PIPELINES) + with pytest.raises(AssertionError): + transform = dict( + type='RandomCutOut', + prob=0.5, + n_holes=1, + cutout_shape=(8, 8), + seg_fill_in=256) + build_from_cfg(transform, PIPELINES) + + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + + seg = np.array( + Image.open(osp.join(osp.dirname(__file__), '../data/seg.png'))) + + results['img'] = img + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + results['pad_shape'] = img.shape + results['img_fields'] = ['img'] + + transform = dict( + type='RandomCutOut', prob=1, n_holes=1, cutout_shape=(10, 10)) + cutout_module = build_from_cfg(transform, PIPELINES) + assert 'cutout_shape' in repr(cutout_module) + cutout_result = cutout_module(copy.deepcopy(results)) + assert cutout_result['img'].sum() < img.sum() + + transform = dict( + type='RandomCutOut', prob=1, n_holes=1, cutout_ratio=(0.8, 0.8)) + cutout_module = build_from_cfg(transform, PIPELINES) + assert 'cutout_ratio' in repr(cutout_module) + cutout_result = cutout_module(copy.deepcopy(results)) + assert cutout_result['img'].sum() < img.sum() + + transform = dict( + type='RandomCutOut', prob=0, n_holes=1, cutout_ratio=(0.8, 0.8)) + cutout_module = build_from_cfg(transform, PIPELINES) + cutout_result = cutout_module(copy.deepcopy(results)) + assert cutout_result['img'].sum() == img.sum() + assert cutout_result['gt_semantic_seg'].sum() == seg.sum() + + transform = dict( + type='RandomCutOut', + prob=1, + n_holes=(2, 4), + cutout_shape=[(10, 10), (15, 15)], + fill_in=(255, 255, 255), + seg_fill_in=None) + cutout_module = build_from_cfg(transform, PIPELINES) + cutout_result = cutout_module(copy.deepcopy(results)) + assert cutout_result['img'].sum() > img.sum() + assert cutout_result['gt_semantic_seg'].sum() == seg.sum() + + transform = dict( + type='RandomCutOut', + prob=1, + n_holes=1, + cutout_ratio=(0.8, 0.8), + fill_in=(255, 255, 255), + seg_fill_in=255) + cutout_module = build_from_cfg(transform, PIPELINES) + cutout_result = cutout_module(copy.deepcopy(results)) + assert cutout_result['img'].sum() > img.sum() + assert cutout_result['gt_semantic_seg'].sum() > seg.sum() + + +def test_mosaic(): + # test prob + with pytest.raises(AssertionError): + transform = dict(type='RandomMosaic', prob=1.5) + build_from_cfg(transform, PIPELINES) + # test assertion for invalid img_scale + with pytest.raises(AssertionError): + transform = dict(type='RandomMosaic', prob=1, img_scale=640) + build_from_cfg(transform, PIPELINES) + + results = dict() + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + seg = np.array( + Image.open(osp.join(osp.dirname(__file__), '../data/seg.png'))) + + results['img'] = img + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + + transform = dict(type='RandomMosaic', prob=1, img_scale=(10, 12)) + mosaic_module = build_from_cfg(transform, PIPELINES) + assert 'Mosaic' in repr(mosaic_module) + + # test assertion for invalid mix_results + with pytest.raises(AssertionError): + mosaic_module(results) + + results['mix_results'] = [copy.deepcopy(results)] * 3 + results = mosaic_module(results) + assert results['img'].shape[:2] == (20, 24) + + results = dict() + results['img'] = img[:, :, 0] + results['gt_semantic_seg'] = seg + results['seg_fields'] = ['gt_semantic_seg'] + + transform = dict(type='RandomMosaic', prob=0, img_scale=(10, 12)) + mosaic_module = build_from_cfg(transform, PIPELINES) + results['mix_results'] = [copy.deepcopy(results)] * 3 + results = mosaic_module(results) + assert results['img'].shape[:2] == img.shape[:2] + + transform = dict(type='RandomMosaic', prob=1, img_scale=(10, 12)) + mosaic_module = build_from_cfg(transform, PIPELINES) + results = mosaic_module(results) + assert results['img'].shape[:2] == (20, 24) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_tta.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_tta.py new file mode 100644 index 0000000..9373e2b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_data/test_tta.py @@ -0,0 +1,189 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +import mmcv +import pytest +from mmcv.utils import build_from_cfg + +from mmseg.datasets.builder import PIPELINES + + +def test_multi_scale_flip_aug(): + # test assertion if img_scale=None, img_ratios=1 (not float). + with pytest.raises(AssertionError): + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=None, + img_ratios=1, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + build_from_cfg(tta_transform, PIPELINES) + + # test assertion if img_scale=None, img_ratios=None. + with pytest.raises(AssertionError): + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=None, + img_ratios=None, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + build_from_cfg(tta_transform, PIPELINES) + + # test assertion if img_scale=(512, 512), img_ratios=1 (not float). + with pytest.raises(AssertionError): + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + img_ratios=1, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + build_from_cfg(tta_transform, PIPELINES) + + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + img_ratios=[0.5, 1.0, 2.0], + flip=False, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + tta_module = build_from_cfg(tta_transform, PIPELINES) + + results = dict() + # (288, 512, 3) + img = mmcv.imread( + osp.join(osp.dirname(__file__), '../data/color.jpg'), 'color') + results['img'] = img + results['img_shape'] = img.shape + results['ori_shape'] = img.shape + # Set initial values for default meta_keys + results['pad_shape'] = img.shape + results['scale_factor'] = 1.0 + + tta_results = tta_module(results.copy()) + assert tta_results['scale'] == [(256, 256), (512, 512), (1024, 1024)] + assert tta_results['flip'] == [False, False, False] + + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + img_ratios=[0.5, 1.0, 2.0], + flip=True, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + tta_module = build_from_cfg(tta_transform, PIPELINES) + tta_results = tta_module(results.copy()) + assert tta_results['scale'] == [(256, 256), (256, 256), (512, 512), + (512, 512), (1024, 1024), (1024, 1024)] + assert tta_results['flip'] == [False, True, False, True, False, True] + + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + img_ratios=1.0, + flip=False, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + tta_module = build_from_cfg(tta_transform, PIPELINES) + tta_results = tta_module(results.copy()) + assert tta_results['scale'] == [(512, 512)] + assert tta_results['flip'] == [False] + + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=(512, 512), + img_ratios=1.0, + flip=True, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + tta_module = build_from_cfg(tta_transform, PIPELINES) + tta_results = tta_module(results.copy()) + assert tta_results['scale'] == [(512, 512), (512, 512)] + assert tta_results['flip'] == [False, True] + + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=None, + img_ratios=[0.5, 1.0, 2.0], + flip=False, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + tta_module = build_from_cfg(tta_transform, PIPELINES) + tta_results = tta_module(results.copy()) + assert tta_results['scale'] == [(256, 144), (512, 288), (1024, 576)] + assert tta_results['flip'] == [False, False, False] + + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=None, + img_ratios=[0.5, 1.0, 2.0], + flip=True, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + tta_module = build_from_cfg(tta_transform, PIPELINES) + tta_results = tta_module(results.copy()) + assert tta_results['scale'] == [(256, 144), (256, 144), (512, 288), + (512, 288), (1024, 576), (1024, 576)] + assert tta_results['flip'] == [False, True, False, True, False, True] + + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=[(256, 256), (512, 512), (1024, 1024)], + img_ratios=None, + flip=False, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + tta_module = build_from_cfg(tta_transform, PIPELINES) + tta_results = tta_module(results.copy()) + assert tta_results['scale'] == [(256, 256), (512, 512), (1024, 1024)] + assert tta_results['flip'] == [False, False, False] + + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=[(256, 256), (512, 512), (1024, 1024)], + img_ratios=None, + flip=True, + transforms=[dict(type='Resize', keep_ratio=False)], + ) + tta_module = build_from_cfg(tta_transform, PIPELINES) + tta_results = tta_module(results.copy()) + assert tta_results['scale'] == [(256, 256), (256, 256), (512, 512), + (512, 512), (1024, 1024), (1024, 1024)] + assert tta_results['flip'] == [False, True, False, True, False, True] + + # test assertion if flip is True and Pad executed before RandomFlip + with pytest.raises(AssertionError): + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=[(256, 256), (512, 512), (1024, 1024)], + img_ratios=None, + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=False), + dict(type='Pad', size_divisor=32), + dict(type='RandomFlip'), + ]) + tta_module = build_from_cfg(tta_transform, PIPELINES) + + tta_transform = dict( + type='MultiScaleFlipAug', + img_scale=[(256, 256), (512, 512), (1024, 1024)], + img_ratios=None, + flip=True, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Pad', size_divisor=32), + ]) + tta_module = build_from_cfg(tta_transform, PIPELINES) + tta_results = tta_module(results.copy()) + assert tta_results['scale'] == [(256, 256), (256, 256), (512, 512), + (512, 512), (1024, 1024), (1024, 1024)] + assert tta_results['flip'] == [False, True, False, True, False, True] + assert tta_results['img_shape'] == [(144, 256, 3), (144, 256, 3), + (288, 512, 3), (288, 512, 3), + (576, 1024, 3), (576, 1024, 3)] + assert tta_results['pad_shape'] == [(160, 256, 3), (160, 256, 3), + (288, 512, 3), (288, 512, 3), + (576, 1024, 3), (576, 1024, 3)] + for i in range(len(tta_results['img'])): + assert tta_results['img'][i].shape == tta_results['pad_shape'][i] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_digit_version.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_digit_version.py new file mode 100644 index 0000000..45daf09 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_digit_version.py @@ -0,0 +1,21 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmseg import digit_version + + +def test_digit_version(): + assert digit_version('0.2.16') == (0, 2, 16, 0, 0, 0) + assert digit_version('1.2.3') == (1, 2, 3, 0, 0, 0) + assert digit_version('1.2.3rc0') == (1, 2, 3, 0, -1, 0) + assert digit_version('1.2.3rc1') == (1, 2, 3, 0, -1, 1) + assert digit_version('1.0rc0') == (1, 0, 0, 0, -1, 0) + assert digit_version('1.0') == digit_version('1.0.0') + assert digit_version('1.5.0+cuda90_cudnn7.6.3_lms') == digit_version('1.5') + assert digit_version('1.0.0dev') < digit_version('1.0.0a') + assert digit_version('1.0.0a') < digit_version('1.0.0a1') + assert digit_version('1.0.0a') < digit_version('1.0.0b') + assert digit_version('1.0.0b') < digit_version('1.0.0rc') + assert digit_version('1.0.0rc1') < digit_version('1.0.0') + assert digit_version('1.0.0') < digit_version('1.0.0post') + assert digit_version('1.0.0post') < digit_version('1.0.0post1') + assert digit_version('v1') == (1, 0, 0, 0, 0, 0) + assert digit_version('v1.1.5') == (1, 1, 5, 0, 0, 0) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_eval_hook.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_eval_hook.py new file mode 100644 index 0000000..5267438 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_eval_hook.py @@ -0,0 +1,204 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import logging +import tempfile +from unittest.mock import MagicMock, patch + +import mmcv.runner +import pytest +import torch +import torch.nn as nn +from mmcv.runner import obj_from_dict +from torch.utils.data import DataLoader, Dataset + +from mmseg.apis import single_gpu_test +from mmseg.core import DistEvalHook, EvalHook + + +class ExampleDataset(Dataset): + + def __getitem__(self, idx): + results = dict(img=torch.tensor([1]), img_metas=dict()) + return results + + def __len__(self): + return 1 + + +class ExampleModel(nn.Module): + + def __init__(self): + super(ExampleModel, self).__init__() + self.test_cfg = None + self.conv = nn.Conv2d(3, 3, 3) + + def forward(self, img, img_metas, test_mode=False, **kwargs): + return img + + def train_step(self, data_batch, optimizer): + loss = self.forward(**data_batch) + return dict(loss=loss) + + +def test_iter_eval_hook(): + with pytest.raises(TypeError): + test_dataset = ExampleModel() + data_loader = [ + DataLoader( + test_dataset, + batch_size=1, + sampler=None, + num_worker=0, + shuffle=False) + ] + EvalHook(data_loader) + + test_dataset = ExampleDataset() + test_dataset.pre_eval = MagicMock(return_value=[torch.tensor([1])]) + test_dataset.evaluate = MagicMock(return_value=dict(test='success')) + loader = DataLoader(test_dataset, batch_size=1) + model = ExampleModel() + data_loader = DataLoader( + test_dataset, batch_size=1, sampler=None, num_workers=0, shuffle=False) + optim_cfg = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) + optimizer = obj_from_dict(optim_cfg, torch.optim, + dict(params=model.parameters())) + + # test EvalHook + with tempfile.TemporaryDirectory() as tmpdir: + eval_hook = EvalHook(data_loader, by_epoch=False, efficient_test=True) + runner = mmcv.runner.IterBasedRunner( + model=model, + optimizer=optimizer, + work_dir=tmpdir, + logger=logging.getLogger()) + runner.register_hook(eval_hook) + runner.run([loader], [('train', 1)], 1) + test_dataset.evaluate.assert_called_with([torch.tensor([1])], + logger=runner.logger) + + +def test_epoch_eval_hook(): + with pytest.raises(TypeError): + test_dataset = ExampleModel() + data_loader = [ + DataLoader( + test_dataset, + batch_size=1, + sampler=None, + num_worker=0, + shuffle=False) + ] + EvalHook(data_loader, by_epoch=True) + + test_dataset = ExampleDataset() + test_dataset.pre_eval = MagicMock(return_value=[torch.tensor([1])]) + test_dataset.evaluate = MagicMock(return_value=dict(test='success')) + loader = DataLoader(test_dataset, batch_size=1) + model = ExampleModel() + data_loader = DataLoader( + test_dataset, batch_size=1, sampler=None, num_workers=0, shuffle=False) + optim_cfg = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) + optimizer = obj_from_dict(optim_cfg, torch.optim, + dict(params=model.parameters())) + + # test EvalHook with interval + with tempfile.TemporaryDirectory() as tmpdir: + eval_hook = EvalHook(data_loader, by_epoch=True, interval=2) + runner = mmcv.runner.EpochBasedRunner( + model=model, + optimizer=optimizer, + work_dir=tmpdir, + logger=logging.getLogger()) + runner.register_hook(eval_hook) + runner.run([loader], [('train', 1)], 2) + test_dataset.evaluate.assert_called_once_with([torch.tensor([1])], + logger=runner.logger) + + +def multi_gpu_test(model, + data_loader, + tmpdir=None, + gpu_collect=False, + pre_eval=False): + # Pre eval is set by default when training. + results = single_gpu_test(model, data_loader, pre_eval=True) + return results + + +@patch('mmseg.apis.multi_gpu_test', multi_gpu_test) +def test_dist_eval_hook(): + with pytest.raises(TypeError): + test_dataset = ExampleModel() + data_loader = [ + DataLoader( + test_dataset, + batch_size=1, + sampler=None, + num_worker=0, + shuffle=False) + ] + DistEvalHook(data_loader) + + test_dataset = ExampleDataset() + test_dataset.pre_eval = MagicMock(return_value=[torch.tensor([1])]) + test_dataset.evaluate = MagicMock(return_value=dict(test='success')) + loader = DataLoader(test_dataset, batch_size=1) + model = ExampleModel() + data_loader = DataLoader( + test_dataset, batch_size=1, sampler=None, num_workers=0, shuffle=False) + optim_cfg = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) + optimizer = obj_from_dict(optim_cfg, torch.optim, + dict(params=model.parameters())) + + # test DistEvalHook + with tempfile.TemporaryDirectory() as tmpdir: + eval_hook = DistEvalHook( + data_loader, by_epoch=False, efficient_test=True) + runner = mmcv.runner.IterBasedRunner( + model=model, + optimizer=optimizer, + work_dir=tmpdir, + logger=logging.getLogger()) + runner.register_hook(eval_hook) + runner.run([loader], [('train', 1)], 1) + test_dataset.evaluate.assert_called_with([torch.tensor([1])], + logger=runner.logger) + + +@patch('mmseg.apis.multi_gpu_test', multi_gpu_test) +def test_dist_eval_hook_epoch(): + with pytest.raises(TypeError): + test_dataset = ExampleModel() + data_loader = [ + DataLoader( + test_dataset, + batch_size=1, + sampler=None, + num_worker=0, + shuffle=False) + ] + DistEvalHook(data_loader) + + test_dataset = ExampleDataset() + test_dataset.pre_eval = MagicMock(return_value=[torch.tensor([1])]) + test_dataset.evaluate = MagicMock(return_value=dict(test='success')) + loader = DataLoader(test_dataset, batch_size=1) + model = ExampleModel() + data_loader = DataLoader( + test_dataset, batch_size=1, sampler=None, num_workers=0, shuffle=False) + optim_cfg = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) + optimizer = obj_from_dict(optim_cfg, torch.optim, + dict(params=model.parameters())) + + # test DistEvalHook + with tempfile.TemporaryDirectory() as tmpdir: + eval_hook = DistEvalHook(data_loader, by_epoch=True, interval=2) + runner = mmcv.runner.EpochBasedRunner( + model=model, + optimizer=optimizer, + work_dir=tmpdir, + logger=logging.getLogger()) + runner.register_hook(eval_hook) + runner.run([loader], [('train', 1)], 2) + test_dataset.evaluate.assert_called_with([torch.tensor([1])], + logger=runner.logger) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_inference.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_inference.py new file mode 100644 index 0000000..f71a7ea --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_inference.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp + +import mmcv + +from mmseg.apis import inference_segmentor, init_segmentor + + +def test_test_time_augmentation_on_cpu(): + config_file = 'configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py' + config = mmcv.Config.fromfile(config_file) + + # Remove pretrain model download for testing + config.model.pretrained = None + # Replace SyncBN with BN to inference on CPU + norm_cfg = dict(type='BN', requires_grad=True) + config.model.backbone.norm_cfg = norm_cfg + config.model.decode_head.norm_cfg = norm_cfg + config.model.auxiliary_head.norm_cfg = norm_cfg + + # Enable test time augmentation + config.data.test.pipeline[1].flip = True + + checkpoint_file = None + model = init_segmentor(config, checkpoint_file, device='cpu') + + img = mmcv.imread( + osp.join(osp.dirname(__file__), 'data/color.jpg'), 'color') + result = inference_segmentor(model, img) + assert result[0].shape == (288, 512) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_metrics.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_metrics.py new file mode 100644 index 0000000..310b320 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_metrics.py @@ -0,0 +1,353 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np + +from mmseg.core.evaluation import (eval_metrics, mean_dice, mean_fscore, + mean_iou) +from mmseg.core.evaluation.metrics import f_score + + +def get_confusion_matrix(pred_label, label, num_classes, ignore_index): + """Intersection over Union + Args: + pred_label (np.ndarray): 2D predict map + label (np.ndarray): label 2D label map + num_classes (int): number of categories + ignore_index (int): index ignore in evaluation + """ + + mask = (label != ignore_index) + pred_label = pred_label[mask] + label = label[mask] + + n = num_classes + inds = n * label + pred_label + + mat = np.bincount(inds, minlength=n**2).reshape(n, n) + print(mat) + + return mat + + +# This func is deprecated since it's not memory efficient +def legacy_mean_iou(results, gt_seg_maps, num_classes, ignore_index): + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_mat = np.zeros((num_classes, num_classes), dtype=np.float) + for i in range(num_imgs): + mat = get_confusion_matrix( + results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index) + total_mat += mat + all_acc = np.diag(total_mat).sum() / total_mat.sum() + acc = np.diag(total_mat) / total_mat.sum(axis=1) + iou = np.diag(total_mat) / ( + total_mat.sum(axis=1) + total_mat.sum(axis=0) - np.diag(total_mat)) + + return all_acc, acc, iou + + +# This func is deprecated since it's not memory efficient +def legacy_mean_dice(results, gt_seg_maps, num_classes, ignore_index): + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_mat = np.zeros((num_classes, num_classes), dtype=np.float) + for i in range(num_imgs): + mat = get_confusion_matrix( + results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index) + total_mat += mat + all_acc = np.diag(total_mat).sum() / total_mat.sum() + acc = np.diag(total_mat) / total_mat.sum(axis=1) + dice = 2 * np.diag(total_mat) / ( + total_mat.sum(axis=1) + total_mat.sum(axis=0)) + print(mat) + + return all_acc, acc, dice + + +# This func is deprecated since it's not memory efficient +def legacy_mean_fscore(results, + gt_seg_maps, + num_classes, + ignore_index, + beta=1): + num_imgs = len(results) + assert len(gt_seg_maps) == num_imgs + total_mat = np.zeros((num_classes, num_classes), dtype=np.float) + for i in range(num_imgs): + mat = get_confusion_matrix( + results[i], gt_seg_maps[i], num_classes, ignore_index=ignore_index) + total_mat += mat + all_acc = np.diag(total_mat).sum() / total_mat.sum() + recall = np.diag(total_mat) / total_mat.sum(axis=1) + precision = np.diag(total_mat) / total_mat.sum(axis=0) + fv = np.vectorize(f_score) + fscore = fv(precision, recall, beta=beta) + + return all_acc, recall, precision, fscore + + +def test_metrics(): + pred_size = (10, 30, 30) + num_classes = 19 + ignore_index = 255 + results = np.random.randint(0, num_classes, size=pred_size) + label = np.random.randint(0, num_classes, size=pred_size) + + # Test the availability of arg: ignore_index. + label[:, 2, 5:10] = ignore_index + + # Test the correctness of the implementation of mIoU calculation. + ret_metrics = eval_metrics( + results, label, num_classes, ignore_index, metrics='mIoU') + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'IoU'] + all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes, + ignore_index) + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(iou, iou_l) + # Test the correctness of the implementation of mDice calculation. + ret_metrics = eval_metrics( + results, label, num_classes, ignore_index, metrics='mDice') + all_acc, acc, dice = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'Dice'] + all_acc_l, acc_l, dice_l = legacy_mean_dice(results, label, num_classes, + ignore_index) + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(dice, dice_l) + # Test the correctness of the implementation of mDice calculation. + ret_metrics = eval_metrics( + results, label, num_classes, ignore_index, metrics='mFscore') + all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[ + 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore'] + all_acc_l, recall_l, precision_l, fscore_l = legacy_mean_fscore( + results, label, num_classes, ignore_index) + assert all_acc == all_acc_l + assert np.allclose(recall, recall_l) + assert np.allclose(precision, precision_l) + assert np.allclose(fscore, fscore_l) + # Test the correctness of the implementation of joint calculation. + ret_metrics = eval_metrics( + results, + label, + num_classes, + ignore_index, + metrics=['mIoU', 'mDice', 'mFscore']) + all_acc, acc, iou, dice, precision, recall, fscore = ret_metrics[ + 'aAcc'], ret_metrics['Acc'], ret_metrics['IoU'], ret_metrics[ + 'Dice'], ret_metrics['Precision'], ret_metrics[ + 'Recall'], ret_metrics['Fscore'] + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(iou, iou_l) + assert np.allclose(dice, dice_l) + assert np.allclose(precision, precision_l) + assert np.allclose(recall, recall_l) + assert np.allclose(fscore, fscore_l) + + # Test the correctness of calculation when arg: num_classes is larger + # than the maximum value of input maps. + results = np.random.randint(0, 5, size=pred_size) + label = np.random.randint(0, 4, size=pred_size) + ret_metrics = eval_metrics( + results, + label, + num_classes, + ignore_index=255, + metrics='mIoU', + nan_to_num=-1) + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'IoU'] + assert acc[-1] == -1 + assert iou[-1] == -1 + + ret_metrics = eval_metrics( + results, + label, + num_classes, + ignore_index=255, + metrics='mDice', + nan_to_num=-1) + all_acc, acc, dice = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'Dice'] + assert acc[-1] == -1 + assert dice[-1] == -1 + + ret_metrics = eval_metrics( + results, + label, + num_classes, + ignore_index=255, + metrics='mFscore', + nan_to_num=-1) + all_acc, precision, recall, fscore = ret_metrics['aAcc'], ret_metrics[ + 'Precision'], ret_metrics['Recall'], ret_metrics['Fscore'] + assert precision[-1] == -1 + assert recall[-1] == -1 + assert fscore[-1] == -1 + + ret_metrics = eval_metrics( + results, + label, + num_classes, + ignore_index=255, + metrics=['mDice', 'mIoU', 'mFscore'], + nan_to_num=-1) + all_acc, acc, iou, dice, precision, recall, fscore = ret_metrics[ + 'aAcc'], ret_metrics['Acc'], ret_metrics['IoU'], ret_metrics[ + 'Dice'], ret_metrics['Precision'], ret_metrics[ + 'Recall'], ret_metrics['Fscore'] + assert acc[-1] == -1 + assert dice[-1] == -1 + assert iou[-1] == -1 + assert precision[-1] == -1 + assert recall[-1] == -1 + assert fscore[-1] == -1 + + # Test the bug which is caused by torch.histc. + # torch.histc: https://pytorch.org/docs/stable/generated/torch.histc.html + # When the arg:bins is set to be same as arg:max, + # some channels of mIoU may be nan. + results = np.array([np.repeat(31, 59)]) + label = np.array([np.arange(59)]) + num_classes = 59 + ret_metrics = eval_metrics( + results, label, num_classes, ignore_index=255, metrics='mIoU') + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'IoU'] + assert not np.any(np.isnan(iou)) + + +def test_mean_iou(): + pred_size = (10, 30, 30) + num_classes = 19 + ignore_index = 255 + results = np.random.randint(0, num_classes, size=pred_size) + label = np.random.randint(0, num_classes, size=pred_size) + label[:, 2, 5:10] = ignore_index + ret_metrics = mean_iou(results, label, num_classes, ignore_index) + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'IoU'] + all_acc_l, acc_l, iou_l = legacy_mean_iou(results, label, num_classes, + ignore_index) + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(iou, iou_l) + + results = np.random.randint(0, 5, size=pred_size) + label = np.random.randint(0, 4, size=pred_size) + ret_metrics = mean_iou( + results, label, num_classes, ignore_index=255, nan_to_num=-1) + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'IoU'] + assert acc[-1] == -1 + assert acc[-1] == -1 + + +def test_mean_dice(): + pred_size = (10, 30, 30) + num_classes = 19 + ignore_index = 255 + results = np.random.randint(0, num_classes, size=pred_size) + label = np.random.randint(0, num_classes, size=pred_size) + label[:, 2, 5:10] = ignore_index + ret_metrics = mean_dice(results, label, num_classes, ignore_index) + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'Dice'] + all_acc_l, acc_l, dice_l = legacy_mean_dice(results, label, num_classes, + ignore_index) + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(iou, dice_l) + + results = np.random.randint(0, 5, size=pred_size) + label = np.random.randint(0, 4, size=pred_size) + ret_metrics = mean_dice( + results, label, num_classes, ignore_index=255, nan_to_num=-1) + all_acc, acc, dice = ret_metrics['aAcc'], ret_metrics['Acc'], ret_metrics[ + 'Dice'] + assert acc[-1] == -1 + assert dice[-1] == -1 + + +def test_mean_fscore(): + pred_size = (10, 30, 30) + num_classes = 19 + ignore_index = 255 + results = np.random.randint(0, num_classes, size=pred_size) + label = np.random.randint(0, num_classes, size=pred_size) + label[:, 2, 5:10] = ignore_index + ret_metrics = mean_fscore(results, label, num_classes, ignore_index) + all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[ + 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore'] + all_acc_l, recall_l, precision_l, fscore_l = legacy_mean_fscore( + results, label, num_classes, ignore_index) + assert all_acc == all_acc_l + assert np.allclose(recall, recall_l) + assert np.allclose(precision, precision_l) + assert np.allclose(fscore, fscore_l) + + ret_metrics = mean_fscore( + results, label, num_classes, ignore_index, beta=2) + all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[ + 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore'] + all_acc_l, recall_l, precision_l, fscore_l = legacy_mean_fscore( + results, label, num_classes, ignore_index, beta=2) + assert all_acc == all_acc_l + assert np.allclose(recall, recall_l) + assert np.allclose(precision, precision_l) + assert np.allclose(fscore, fscore_l) + + results = np.random.randint(0, 5, size=pred_size) + label = np.random.randint(0, 4, size=pred_size) + ret_metrics = mean_fscore( + results, label, num_classes, ignore_index=255, nan_to_num=-1) + all_acc, recall, precision, fscore = ret_metrics['aAcc'], ret_metrics[ + 'Recall'], ret_metrics['Precision'], ret_metrics['Fscore'] + assert recall[-1] == -1 + assert precision[-1] == -1 + assert fscore[-1] == -1 + + +def test_filename_inputs(): + import tempfile + + import cv2 + + def save_arr(input_arrays: list, title: str, is_image: bool, dir: str): + filenames = [] + SUFFIX = '.png' if is_image else '.npy' + for idx, arr in enumerate(input_arrays): + filename = '{}/{}-{}{}'.format(dir, title, idx, SUFFIX) + if is_image: + cv2.imwrite(filename, arr) + else: + np.save(filename, arr) + filenames.append(filename) + return filenames + + pred_size = (10, 30, 30) + num_classes = 19 + ignore_index = 255 + results = np.random.randint(0, num_classes, size=pred_size) + labels = np.random.randint(0, num_classes, size=pred_size) + labels[:, 2, 5:10] = ignore_index + + with tempfile.TemporaryDirectory() as temp_dir: + + result_files = save_arr(results, 'pred', False, temp_dir) + label_files = save_arr(labels, 'label', True, temp_dir) + + ret_metrics = eval_metrics( + result_files, + label_files, + num_classes, + ignore_index, + metrics='mIoU') + all_acc, acc, iou = ret_metrics['aAcc'], ret_metrics[ + 'Acc'], ret_metrics['IoU'] + all_acc_l, acc_l, iou_l = legacy_mean_iou(results, labels, num_classes, + ignore_index) + assert all_acc == all_acc_l + assert np.allclose(acc, acc_l) + assert np.allclose(iou, iou_l) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/__init__.py new file mode 100644 index 0000000..ef101fe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/__init__.py @@ -0,0 +1 @@ +# Copyright (c) OpenMMLab. All rights reserved. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/__init__.py new file mode 100644 index 0000000..8b673fa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/__init__.py @@ -0,0 +1,4 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from .utils import all_zeros, check_norm_state, is_block, is_norm + +__all__ = ['is_norm', 'is_block', 'all_zeros', 'check_norm_state'] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_beit.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_beit.py new file mode 100644 index 0000000..cf39608 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_beit.py @@ -0,0 +1,182 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones.beit import BEiT +from .utils import check_norm_state + + +def test_beit_backbone(): + with pytest.raises(TypeError): + # pretrained must be a string path + model = BEiT() + model.init_weights(pretrained=0) + + with pytest.raises(TypeError): + # img_size must be int or tuple + model = BEiT(img_size=512.0) + + with pytest.raises(TypeError): + # out_indices must be int ,list or tuple + model = BEiT(out_indices=1.) + + with pytest.raises(AssertionError): + # The length of img_size tuple must be lower than 3. + BEiT(img_size=(224, 224, 224)) + + with pytest.raises(TypeError): + # Pretrained must be None or Str. + BEiT(pretrained=123) + + # Test img_size isinstance tuple + imgs = torch.randn(1, 3, 224, 224) + model = BEiT(img_size=(224, )) + model.init_weights() + model(imgs) + + # Test img_size isinstance tuple + imgs = torch.randn(1, 3, 224, 224) + model = BEiT(img_size=(224, 224)) + model(imgs) + + # Test norm_eval = True + model = BEiT(norm_eval=True) + model.train() + + # Test BEiT backbone with input size of 224 and patch size of 16 + model = BEiT() + model.init_weights() + model.train() + + # Test qv_bias + model = BEiT(qv_bias=False) + model.train() + + # Test out_indices = list + model = BEiT(out_indices=[2, 4, 8, 12]) + model.train() + + assert check_norm_state(model.modules(), True) + + # Test image size = (224, 224) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test BEiT backbone with input size of 256 and patch size of 16 + model = BEiT(img_size=(256, 256)) + model.init_weights() + model.train() + imgs = torch.randn(1, 3, 256, 256) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 16, 16) + + # Test BEiT backbone with input size of 32 and patch size of 16 + model = BEiT(img_size=(32, 32)) + model.init_weights() + model.train() + imgs = torch.randn(1, 3, 32, 32) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 2, 2) + + # Test unbalanced size input image + model = BEiT(img_size=(112, 224)) + model.init_weights() + model.train() + imgs = torch.randn(1, 3, 112, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 7, 14) + + # Test irregular input image + model = BEiT(img_size=(234, 345)) + model.init_weights() + model.train() + imgs = torch.randn(1, 3, 234, 345) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 21) + + # Test init_values=0 + model = BEiT(init_values=0) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test final norm + model = BEiT(final_norm=True) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test patch norm + model = BEiT(patch_norm=True) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + +def test_beit_init(): + path = 'PATH_THAT_DO_NOT_EXIST' + # Test all combinations of pretrained and init_cfg + # pretrained=None, init_cfg=None + model = BEiT(pretrained=None, init_cfg=None) + assert model.init_cfg is None + model.init_weights() + + # pretrained=None + # init_cfg loads pretrain from an non-existent file + model = BEiT( + pretrained=None, init_cfg=dict(type='Pretrained', checkpoint=path)) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # test resize_rel_pos_embed + value = torch.randn(732, 16) + ckpt = { + 'state_dict': { + 'layers.0.attn.relative_position_index': 0, + 'layers.0.attn.relative_position_bias_table': value + } + } + model = BEiT(img_size=(512, 512)) + with pytest.raises(AttributeError): + model.resize_rel_pos_embed(ckpt) + + # pretrained=None + # init_cfg=123, whose type is unsupported + model = BEiT(pretrained=None, init_cfg=123) + with pytest.raises(TypeError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg=None + model = BEiT(pretrained=path, init_cfg=None) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + model = BEiT( + pretrained=path, init_cfg=dict(type='Pretrained', checkpoint=path)) + with pytest.raises(AssertionError): + model = BEiT(pretrained=path, init_cfg=123) + + # pretrain=123, whose type is unsupported + # init_cfg=None + with pytest.raises(TypeError): + model = BEiT(pretrained=123, init_cfg=None) + + # pretrain=123, whose type is unsupported + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + model = BEiT( + pretrained=123, init_cfg=dict(type='Pretrained', checkpoint=path)) + + # pretrain=123, whose type is unsupported + # init_cfg=123, whose type is unsupported + with pytest.raises(AssertionError): + model = BEiT(pretrained=123, init_cfg=123) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_bisenetv1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_bisenetv1.py new file mode 100644 index 0000000..c067749 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_bisenetv1.py @@ -0,0 +1,109 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import BiSeNetV1 +from mmseg.models.backbones.bisenetv1 import (AttentionRefinementModule, + ContextPath, FeatureFusionModule, + SpatialPath) + + +def test_bisenetv1_backbone(): + # Test BiSeNetV1 Standard Forward + backbone_cfg = dict( + type='ResNet', + in_channels=3, + depth=18, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_eval=False, + style='pytorch', + contract_dilation=True) + model = BiSeNetV1(in_channels=3, backbone_cfg=backbone_cfg) + model.init_weights() + model.train() + batch_size = 2 + imgs = torch.randn(batch_size, 3, 64, 128) + feat = model(imgs) + + assert len(feat) == 3 + # output for segment Head + assert feat[0].shape == torch.Size([batch_size, 256, 8, 16]) + # for auxiliary head 1 + assert feat[1].shape == torch.Size([batch_size, 128, 8, 16]) + # for auxiliary head 2 + assert feat[2].shape == torch.Size([batch_size, 128, 4, 8]) + + # Test input with rare shape + batch_size = 2 + imgs = torch.randn(batch_size, 3, 95, 27) + feat = model(imgs) + assert len(feat) == 3 + + with pytest.raises(AssertionError): + # BiSeNetV1 spatial path channel constraints. + BiSeNetV1( + backbone_cfg=backbone_cfg, + in_channels=3, + spatial_channels=(16, 16, 16)) + + with pytest.raises(AssertionError): + # BiSeNetV1 context path constraints. + BiSeNetV1( + backbone_cfg=backbone_cfg, + in_channels=3, + context_channels=(16, 32, 64, 128)) + + +def test_bisenetv1_spatial_path(): + with pytest.raises(AssertionError): + # BiSeNetV1 spatial path channel constraints. + SpatialPath(num_channels=(16, 16, 16), in_channels=3) + + +def test_bisenetv1_context_path(): + backbone_cfg = dict( + type='ResNet', + in_channels=3, + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 1, 1), + strides=(1, 2, 2, 2), + norm_eval=False, + style='pytorch', + contract_dilation=True) + + with pytest.raises(AssertionError): + # BiSeNetV1 context path constraints. + ContextPath( + backbone_cfg=backbone_cfg, context_channels=(16, 32, 64, 128)) + + +def test_bisenetv1_attention_refinement_module(): + x_arm = AttentionRefinementModule(32, 8) + assert x_arm.conv_layer.in_channels == 32 + assert x_arm.conv_layer.out_channels == 8 + assert x_arm.conv_layer.kernel_size == (3, 3) + x = torch.randn(2, 32, 8, 16) + x_out = x_arm(x) + assert x_out.shape == torch.Size([2, 8, 8, 16]) + + +def test_bisenetv1_feature_fusion_module(): + ffm = FeatureFusionModule(16, 32) + assert ffm.conv1.in_channels == 16 + assert ffm.conv1.out_channels == 32 + assert ffm.conv1.kernel_size == (1, 1) + assert ffm.gap.output_size == (1, 1) + assert ffm.conv_atten[0].in_channels == 32 + assert ffm.conv_atten[0].out_channels == 32 + assert ffm.conv_atten[0].kernel_size == (1, 1) + + ffm = FeatureFusionModule(16, 16) + x1 = torch.randn(2, 8, 8, 16) + x2 = torch.randn(2, 8, 8, 16) + x_out = ffm(x1, x2) + assert x_out.shape == torch.Size([2, 16, 8, 16]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_bisenetv2.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_bisenetv2.py new file mode 100644 index 0000000..cf2dfb3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_bisenetv2.py @@ -0,0 +1,57 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.cnn import ConvModule + +from mmseg.models.backbones import BiSeNetV2 +from mmseg.models.backbones.bisenetv2 import (BGALayer, DetailBranch, + SemanticBranch) + + +def test_bisenetv2_backbone(): + # Test BiSeNetV2 Standard Forward + model = BiSeNetV2() + model.init_weights() + model.train() + batch_size = 2 + imgs = torch.randn(batch_size, 3, 128, 256) + feat = model(imgs) + + assert len(feat) == 5 + # output for segment Head + assert feat[0].shape == torch.Size([batch_size, 128, 16, 32]) + # for auxiliary head 1 + assert feat[1].shape == torch.Size([batch_size, 16, 32, 64]) + # for auxiliary head 2 + assert feat[2].shape == torch.Size([batch_size, 32, 16, 32]) + # for auxiliary head 3 + assert feat[3].shape == torch.Size([batch_size, 64, 8, 16]) + # for auxiliary head 4 + assert feat[4].shape == torch.Size([batch_size, 128, 4, 8]) + + # Test input with rare shape + batch_size = 2 + imgs = torch.randn(batch_size, 3, 95, 27) + feat = model(imgs) + assert len(feat) == 5 + + +def test_bisenetv2_DetailBranch(): + x = torch.randn(1, 3, 32, 64) + detail_branch = DetailBranch(detail_channels=(64, 16, 32)) + assert isinstance(detail_branch.detail_branch[0][0], ConvModule) + x_out = detail_branch(x) + assert x_out.shape == torch.Size([1, 32, 4, 8]) + + +def test_bisenetv2_SemanticBranch(): + semantic_branch = SemanticBranch(semantic_channels=(16, 32, 64, 128)) + assert semantic_branch.stage1.pool.stride == 2 + + +def test_bisenetv2_BGALayer(): + x_a = torch.randn(1, 8, 8, 16) + x_b = torch.randn(1, 8, 2, 4) + bga = BGALayer(out_channels=8) + assert isinstance(bga.conv, ConvModule) + x_out = bga(x_a, x_b) + assert x_out.shape == torch.Size([1, 8, 8, 16]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_blocks.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_blocks.py new file mode 100644 index 0000000..77c8564 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_blocks.py @@ -0,0 +1,186 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import mmcv +import pytest +import torch +from mmcv.utils import TORCH_VERSION, digit_version + +from mmseg.models.utils import (InvertedResidual, InvertedResidualV3, SELayer, + make_divisible) + + +def test_make_divisible(): + # test with min_value = None + assert make_divisible(10, 4) == 12 + assert make_divisible(9, 4) == 12 + assert make_divisible(1, 4) == 4 + + # test with min_value = 8 + assert make_divisible(10, 4, 8) == 12 + assert make_divisible(9, 4, 8) == 12 + assert make_divisible(1, 4, 8) == 8 + + +def test_inv_residual(): + with pytest.raises(AssertionError): + # test stride assertion. + InvertedResidual(32, 32, 3, 4) + + # test default config with res connection. + # set expand_ratio = 4, stride = 1 and inp=oup. + inv_module = InvertedResidual(32, 32, 1, 4) + assert inv_module.use_res_connect + assert inv_module.conv[0].kernel_size == (1, 1) + assert inv_module.conv[0].padding == 0 + assert inv_module.conv[1].kernel_size == (3, 3) + assert inv_module.conv[1].padding == 1 + assert inv_module.conv[0].with_norm + assert inv_module.conv[1].with_norm + x = torch.rand(1, 32, 64, 64) + output = inv_module(x) + assert output.shape == (1, 32, 64, 64) + + # test inv_residual module without res connection. + # set expand_ratio = 4, stride = 2. + inv_module = InvertedResidual(32, 32, 2, 4) + assert not inv_module.use_res_connect + assert inv_module.conv[0].kernel_size == (1, 1) + x = torch.rand(1, 32, 64, 64) + output = inv_module(x) + assert output.shape == (1, 32, 32, 32) + + # test expand_ratio == 1 + inv_module = InvertedResidual(32, 32, 1, 1) + assert inv_module.conv[0].kernel_size == (3, 3) + x = torch.rand(1, 32, 64, 64) + output = inv_module(x) + assert output.shape == (1, 32, 64, 64) + + # test with checkpoint forward + inv_module = InvertedResidual(32, 32, 1, 1, with_cp=True) + assert inv_module.with_cp + x = torch.rand(1, 32, 64, 64, requires_grad=True) + output = inv_module(x) + assert output.shape == (1, 32, 64, 64) + + +def test_inv_residualv3(): + with pytest.raises(AssertionError): + # test stride assertion. + InvertedResidualV3(32, 32, 16, stride=3) + + with pytest.raises(AssertionError): + # test assertion. + InvertedResidualV3(32, 32, 16, with_expand_conv=False) + + # test with se_cfg=None, with_expand_conv=False + inv_module = InvertedResidualV3(32, 32, 32, with_expand_conv=False) + + assert inv_module.with_res_shortcut is True + assert inv_module.with_se is False + assert inv_module.with_expand_conv is False + assert not hasattr(inv_module, 'expand_conv') + assert isinstance(inv_module.depthwise_conv.conv, torch.nn.Conv2d) + assert inv_module.depthwise_conv.conv.kernel_size == (3, 3) + assert inv_module.depthwise_conv.conv.stride == (1, 1) + assert inv_module.depthwise_conv.conv.padding == (1, 1) + assert isinstance(inv_module.depthwise_conv.bn, torch.nn.BatchNorm2d) + assert isinstance(inv_module.depthwise_conv.activate, torch.nn.ReLU) + assert inv_module.linear_conv.conv.kernel_size == (1, 1) + assert inv_module.linear_conv.conv.stride == (1, 1) + assert inv_module.linear_conv.conv.padding == (0, 0) + assert isinstance(inv_module.linear_conv.bn, torch.nn.BatchNorm2d) + + x = torch.rand(1, 32, 64, 64) + output = inv_module(x) + assert output.shape == (1, 32, 64, 64) + + # test with se_cfg and with_expand_conv + se_cfg = dict( + channels=16, + ratio=4, + act_cfg=(dict(type='ReLU'), + dict(type='HSigmoid', bias=3.0, divisor=6.0))) + act_cfg = dict(type='HSwish') + inv_module = InvertedResidualV3( + 32, 40, 16, 3, 2, se_cfg=se_cfg, act_cfg=act_cfg) + assert inv_module.with_res_shortcut is False + assert inv_module.with_se is True + assert inv_module.with_expand_conv is True + assert inv_module.expand_conv.conv.kernel_size == (1, 1) + assert inv_module.expand_conv.conv.stride == (1, 1) + assert inv_module.expand_conv.conv.padding == (0, 0) + + assert isinstance(inv_module.depthwise_conv.conv, + mmcv.cnn.bricks.Conv2dAdaptivePadding) + assert inv_module.depthwise_conv.conv.kernel_size == (3, 3) + assert inv_module.depthwise_conv.conv.stride == (2, 2) + assert inv_module.depthwise_conv.conv.padding == (0, 0) + assert isinstance(inv_module.depthwise_conv.bn, torch.nn.BatchNorm2d) + + assert inv_module.linear_conv.conv.kernel_size == (1, 1) + assert inv_module.linear_conv.conv.stride == (1, 1) + assert inv_module.linear_conv.conv.padding == (0, 0) + assert isinstance(inv_module.linear_conv.bn, torch.nn.BatchNorm2d) + + if (TORCH_VERSION == 'parrots' + or digit_version(TORCH_VERSION) < digit_version('1.7')): + # Note: Use PyTorch official HSwish + # when torch>=1.7 after MMCV >= 1.4.5. + # Hardswish is not supported when PyTorch version < 1.6. + # And Hardswish in PyTorch 1.6 does not support inplace. + # More details could be found from: + # https://github.com/open-mmlab/mmcv/pull/1709 + assert isinstance(inv_module.expand_conv.activate, mmcv.cnn.HSwish) + assert isinstance(inv_module.depthwise_conv.activate, mmcv.cnn.HSwish) + else: + assert isinstance(inv_module.expand_conv.activate, torch.nn.Hardswish) + assert isinstance(inv_module.depthwise_conv.activate, + torch.nn.Hardswish) + + x = torch.rand(1, 32, 64, 64) + output = inv_module(x) + assert output.shape == (1, 40, 32, 32) + + # test with checkpoint forward + inv_module = InvertedResidualV3( + 32, 40, 16, 3, 2, se_cfg=se_cfg, act_cfg=act_cfg, with_cp=True) + assert inv_module.with_cp + x = torch.randn(2, 32, 64, 64, requires_grad=True) + output = inv_module(x) + assert output.shape == (2, 40, 32, 32) + + +def test_se_layer(): + with pytest.raises(AssertionError): + # test act_cfg assertion. + SELayer(32, act_cfg=(dict(type='ReLU'), )) + + # test config with channels = 16. + se_layer = SELayer(16) + assert se_layer.conv1.conv.kernel_size == (1, 1) + assert se_layer.conv1.conv.stride == (1, 1) + assert se_layer.conv1.conv.padding == (0, 0) + assert isinstance(se_layer.conv1.activate, torch.nn.ReLU) + assert se_layer.conv2.conv.kernel_size == (1, 1) + assert se_layer.conv2.conv.stride == (1, 1) + assert se_layer.conv2.conv.padding == (0, 0) + assert isinstance(se_layer.conv2.activate, mmcv.cnn.HSigmoid) + + x = torch.rand(1, 16, 64, 64) + output = se_layer(x) + assert output.shape == (1, 16, 64, 64) + + # test config with channels = 16, act_cfg = dict(type='ReLU'). + se_layer = SELayer(16, act_cfg=dict(type='ReLU')) + assert se_layer.conv1.conv.kernel_size == (1, 1) + assert se_layer.conv1.conv.stride == (1, 1) + assert se_layer.conv1.conv.padding == (0, 0) + assert isinstance(se_layer.conv1.activate, torch.nn.ReLU) + assert se_layer.conv2.conv.kernel_size == (1, 1) + assert se_layer.conv2.conv.stride == (1, 1) + assert se_layer.conv2.conv.padding == (0, 0) + assert isinstance(se_layer.conv2.activate, torch.nn.ReLU) + + x = torch.rand(1, 16, 64, 64) + output = se_layer(x) + assert output.shape == (1, 16, 64, 64) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_cgnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_cgnet.py new file mode 100644 index 0000000..f938525 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_cgnet.py @@ -0,0 +1,151 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import CGNet +from mmseg.models.backbones.cgnet import (ContextGuidedBlock, + GlobalContextExtractor) + + +def test_cgnet_GlobalContextExtractor(): + block = GlobalContextExtractor(16, 16, with_cp=True) + x = torch.randn(2, 16, 64, 64, requires_grad=True) + x_out = block(x) + assert x_out.shape == torch.Size([2, 16, 64, 64]) + + +def test_cgnet_context_guided_block(): + with pytest.raises(AssertionError): + # cgnet ContextGuidedBlock GlobalContextExtractor channel and reduction + # constraints. + ContextGuidedBlock(8, 8) + + # test cgnet ContextGuidedBlock with checkpoint forward + block = ContextGuidedBlock( + 16, 16, act_cfg=dict(type='PReLU'), with_cp=True) + assert block.with_cp + x = torch.randn(2, 16, 64, 64, requires_grad=True) + x_out = block(x) + assert x_out.shape == torch.Size([2, 16, 64, 64]) + + # test cgnet ContextGuidedBlock without checkpoint forward + block = ContextGuidedBlock(32, 32) + assert not block.with_cp + x = torch.randn(3, 32, 32, 32) + x_out = block(x) + assert x_out.shape == torch.Size([3, 32, 32, 32]) + + # test cgnet ContextGuidedBlock with down sampling + block = ContextGuidedBlock(32, 32, downsample=True) + assert block.conv1x1.conv.in_channels == 32 + assert block.conv1x1.conv.out_channels == 32 + assert block.conv1x1.conv.kernel_size == (3, 3) + assert block.conv1x1.conv.stride == (2, 2) + assert block.conv1x1.conv.padding == (1, 1) + + assert block.f_loc.in_channels == 32 + assert block.f_loc.out_channels == 32 + assert block.f_loc.kernel_size == (3, 3) + assert block.f_loc.stride == (1, 1) + assert block.f_loc.padding == (1, 1) + assert block.f_loc.groups == 32 + assert block.f_loc.dilation == (1, 1) + assert block.f_loc.bias is None + + assert block.f_sur.in_channels == 32 + assert block.f_sur.out_channels == 32 + assert block.f_sur.kernel_size == (3, 3) + assert block.f_sur.stride == (1, 1) + assert block.f_sur.padding == (2, 2) + assert block.f_sur.groups == 32 + assert block.f_sur.dilation == (2, 2) + assert block.f_sur.bias is None + + assert block.bottleneck.in_channels == 64 + assert block.bottleneck.out_channels == 32 + assert block.bottleneck.kernel_size == (1, 1) + assert block.bottleneck.stride == (1, 1) + assert block.bottleneck.bias is None + + x = torch.randn(1, 32, 32, 32) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 16, 16]) + + # test cgnet ContextGuidedBlock without down sampling + block = ContextGuidedBlock(32, 32, downsample=False) + assert block.conv1x1.conv.in_channels == 32 + assert block.conv1x1.conv.out_channels == 16 + assert block.conv1x1.conv.kernel_size == (1, 1) + assert block.conv1x1.conv.stride == (1, 1) + assert block.conv1x1.conv.padding == (0, 0) + + assert block.f_loc.in_channels == 16 + assert block.f_loc.out_channels == 16 + assert block.f_loc.kernel_size == (3, 3) + assert block.f_loc.stride == (1, 1) + assert block.f_loc.padding == (1, 1) + assert block.f_loc.groups == 16 + assert block.f_loc.dilation == (1, 1) + assert block.f_loc.bias is None + + assert block.f_sur.in_channels == 16 + assert block.f_sur.out_channels == 16 + assert block.f_sur.kernel_size == (3, 3) + assert block.f_sur.stride == (1, 1) + assert block.f_sur.padding == (2, 2) + assert block.f_sur.groups == 16 + assert block.f_sur.dilation == (2, 2) + assert block.f_sur.bias is None + + x = torch.randn(1, 32, 32, 32) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 32, 32]) + + +def test_cgnet_backbone(): + with pytest.raises(AssertionError): + # check invalid num_channels + CGNet(num_channels=(32, 64, 128, 256)) + + with pytest.raises(AssertionError): + # check invalid num_blocks + CGNet(num_blocks=(3, 21, 3)) + + with pytest.raises(AssertionError): + # check invalid dilation + CGNet(num_blocks=2) + + with pytest.raises(AssertionError): + # check invalid reduction + CGNet(reductions=16) + + with pytest.raises(AssertionError): + # check invalid num_channels and reduction + CGNet(num_channels=(32, 64, 128), reductions=(64, 129)) + + # Test CGNet with default settings + model = CGNet() + model.init_weights() + model.train() + + imgs = torch.randn(2, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 3 + assert feat[0].shape == torch.Size([2, 35, 112, 112]) + assert feat[1].shape == torch.Size([2, 131, 56, 56]) + assert feat[2].shape == torch.Size([2, 256, 28, 28]) + + # Test CGNet with norm_eval True and with_cp True + model = CGNet(norm_eval=True, with_cp=True) + with pytest.raises(TypeError): + # check invalid pretrained + model.init_weights(pretrained=8) + model.init_weights() + model.train() + + imgs = torch.randn(2, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 3 + assert feat[0].shape == torch.Size([2, 35, 112, 112]) + assert feat[1].shape == torch.Size([2, 131, 56, 56]) + assert feat[2].shape == torch.Size([2, 256, 28, 28]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_erfnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_erfnet.py new file mode 100644 index 0000000..6ae7345 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_erfnet.py @@ -0,0 +1,146 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import ERFNet +from mmseg.models.backbones.erfnet import (DownsamplerBlock, NonBottleneck1d, + UpsamplerBlock) + + +def test_erfnet_backbone(): + # Test ERFNet Standard Forward. + model = ERFNet( + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + ) + model.init_weights() + model.train() + batch_size = 2 + imgs = torch.randn(batch_size, 3, 256, 512) + output = model(imgs) + + # output for segment Head + assert output[0].shape == torch.Size([batch_size, 16, 128, 256]) + + # Test input with rare shape + batch_size = 2 + imgs = torch.randn(batch_size, 3, 527, 279) + output = model(imgs) + assert len(output[0]) == batch_size + + with pytest.raises(AssertionError): + # Number of encoder downsample block and decoder upsample block. + ERFNet( + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(128, 64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + ) + with pytest.raises(AssertionError): + # Number of encoder downsample block and encoder Non-bottleneck block. + ERFNet( + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8, 10), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + ) + with pytest.raises(AssertionError): + # Number of encoder downsample block and + # channels of encoder Non-bottleneck block. + ERFNet( + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128, 256), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + ) + + with pytest.raises(AssertionError): + # Number of encoder Non-bottleneck block and number of its channels. + ERFNet( + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8, 3), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + ) + with pytest.raises(AssertionError): + # Number of decoder upsample block and decoder Non-bottleneck block. + ERFNet( + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2, 3), + dec_non_bottleneck_channels=(64, 16), + dropout_ratio=0.1, + ) + with pytest.raises(AssertionError): + # Number of decoder Non-bottleneck block and number of its channels. + ERFNet( + in_channels=3, + enc_downsample_channels=(16, 64, 128), + enc_stage_non_bottlenecks=(5, 8), + enc_non_bottleneck_dilations=(2, 4, 8, 16), + enc_non_bottleneck_channels=(64, 128), + dec_upsample_channels=(64, 16), + dec_stages_non_bottleneck=(2, 2), + dec_non_bottleneck_channels=(64, 16, 8), + dropout_ratio=0.1, + ) + + +def test_erfnet_downsampler_block(): + x_db = DownsamplerBlock(16, 64) + assert x_db.conv.in_channels == 16 + assert x_db.conv.out_channels == 48 + assert len(x_db.bn.weight) == 64 + assert x_db.pool.kernel_size == 2 + assert x_db.pool.stride == 2 + + +def test_erfnet_non_bottleneck_1d(): + x_nb1d = NonBottleneck1d(16, 0, 1) + assert x_nb1d.convs_layers[0].in_channels == 16 + assert x_nb1d.convs_layers[0].out_channels == 16 + assert x_nb1d.convs_layers[2].in_channels == 16 + assert x_nb1d.convs_layers[2].out_channels == 16 + assert x_nb1d.convs_layers[5].in_channels == 16 + assert x_nb1d.convs_layers[5].out_channels == 16 + assert x_nb1d.convs_layers[7].in_channels == 16 + assert x_nb1d.convs_layers[7].out_channels == 16 + assert x_nb1d.convs_layers[9].p == 0 + + +def test_erfnet_upsampler_block(): + x_ub = UpsamplerBlock(64, 16) + assert x_ub.conv.in_channels == 64 + assert x_ub.conv.out_channels == 16 + assert len(x_ub.bn.weight) == 16 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_fast_scnn.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_fast_scnn.py new file mode 100644 index 0000000..7ee638b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_fast_scnn.py @@ -0,0 +1,42 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import FastSCNN + + +def test_fastscnn_backbone(): + with pytest.raises(AssertionError): + # Fast-SCNN channel constraints. + FastSCNN( + 3, (32, 48), + 64, (64, 96, 128), (2, 2, 1), + global_out_channels=127, + higher_in_channels=64, + lower_in_channels=128) + + # Test FastSCNN Standard Forward + model = FastSCNN( + in_channels=3, + downsample_dw_channels=(4, 6), + global_in_channels=8, + global_block_channels=(8, 12, 16), + global_block_strides=(2, 2, 1), + global_out_channels=16, + higher_in_channels=8, + lower_in_channels=16, + fusion_out_channels=16, + ) + model.init_weights() + model.train() + batch_size = 4 + imgs = torch.randn(batch_size, 3, 64, 128) + feat = model(imgs) + + assert len(feat) == 3 + # higher-res + assert feat[0].shape == torch.Size([batch_size, 8, 8, 16]) + # lower-res + assert feat[1].shape == torch.Size([batch_size, 16, 2, 4]) + # FFM output + assert feat[2].shape == torch.Size([batch_size, 16, 8, 16]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_hrnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_hrnet.py new file mode 100644 index 0000000..8329c84 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_hrnet.py @@ -0,0 +1,144 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch +from mmcv.utils.parrots_wrapper import _BatchNorm + +from mmseg.models.backbones.hrnet import HRModule, HRNet +from mmseg.models.backbones.resnet import BasicBlock, Bottleneck + + +@pytest.mark.parametrize('block', [BasicBlock, Bottleneck]) +def test_hrmodule(block): + # Test multiscale forward + num_channles = (32, 64) + in_channels = [c * block.expansion for c in num_channles] + hrmodule = HRModule( + num_branches=2, + blocks=block, + in_channels=in_channels, + num_blocks=(4, 4), + num_channels=num_channles, + ) + + feats = [ + torch.randn(1, in_channels[0], 64, 64), + torch.randn(1, in_channels[1], 32, 32) + ] + feats = hrmodule(feats) + + assert len(feats) == 2 + assert feats[0].shape == torch.Size([1, in_channels[0], 64, 64]) + assert feats[1].shape == torch.Size([1, in_channels[1], 32, 32]) + + # Test single scale forward + num_channles = (32, 64) + in_channels = [c * block.expansion for c in num_channles] + hrmodule = HRModule( + num_branches=2, + blocks=block, + in_channels=in_channels, + num_blocks=(4, 4), + num_channels=num_channles, + multiscale_output=False, + ) + + feats = [ + torch.randn(1, in_channels[0], 64, 64), + torch.randn(1, in_channels[1], 32, 32) + ] + feats = hrmodule(feats) + + assert len(feats) == 1 + assert feats[0].shape == torch.Size([1, in_channels[0], 64, 64]) + + +def test_hrnet_backbone(): + # only have 3 stages + extra = dict( + stage1=dict( + num_modules=1, + num_branches=1, + block='BOTTLENECK', + num_blocks=(4, ), + num_channels=(64, )), + stage2=dict( + num_modules=1, + num_branches=2, + block='BASIC', + num_blocks=(4, 4), + num_channels=(32, 64)), + stage3=dict( + num_modules=4, + num_branches=3, + block='BASIC', + num_blocks=(4, 4, 4), + num_channels=(32, 64, 128))) + + with pytest.raises(AssertionError): + # HRNet now only support 4 stages + HRNet(extra=extra) + extra['stage4'] = dict( + num_modules=3, + num_branches=3, # should be 4 + block='BASIC', + num_blocks=(4, 4, 4, 4), + num_channels=(32, 64, 128, 256)) + + with pytest.raises(AssertionError): + # len(num_blocks) should equal num_branches + HRNet(extra=extra) + + extra['stage4']['num_branches'] = 4 + + # Test hrnetv2p_w32 + model = HRNet(extra=extra) + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 64, 64) + feats = model(imgs) + assert len(feats) == 4 + assert feats[0].shape == torch.Size([1, 32, 16, 16]) + assert feats[3].shape == torch.Size([1, 256, 2, 2]) + + # Test single scale output + model = HRNet(extra=extra, multiscale_output=False) + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 64, 64) + feats = model(imgs) + assert len(feats) == 1 + assert feats[0].shape == torch.Size([1, 32, 16, 16]) + + # Test HRNET with two stage frozen + frozen_stages = 2 + model = HRNet(extra, frozen_stages=frozen_stages) + model.init_weights() + model.train() + assert model.norm1.training is False + + for layer in [model.conv1, model.norm1]: + for param in layer.parameters(): + assert param.requires_grad is False + for i in range(1, frozen_stages + 1): + if i == 1: + layer = getattr(model, f'layer{i}') + transition = getattr(model, f'transition{i}') + elif i == 4: + layer = getattr(model, f'stage{i}') + else: + layer = getattr(model, f'stage{i}') + transition = getattr(model, f'transition{i}') + + for mod in layer.modules(): + if isinstance(mod, _BatchNorm): + assert mod.training is False + for param in layer.parameters(): + assert param.requires_grad is False + + for mod in transition.modules(): + if isinstance(mod, _BatchNorm): + assert mod.training is False + for param in transition.parameters(): + assert param.requires_grad is False diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_icnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_icnet.py new file mode 100644 index 0000000..a96d8d8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_icnet.py @@ -0,0 +1,50 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import ICNet + + +def test_icnet_backbone(): + with pytest.raises(TypeError): + # Must give backbone dict in config file. + ICNet( + in_channels=3, + layer_channels=(128, 512), + light_branch_middle_channels=8, + psp_out_channels=128, + out_channels=(16, 128, 128), + backbone_cfg=None) + + # Test ICNet Standard Forward + model = ICNet( + layer_channels=(128, 512), + backbone_cfg=dict( + type='ResNetV1c', + in_channels=3, + depth=18, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + style='pytorch', + contract_dilation=True), + ) + assert hasattr(model.backbone, + 'maxpool') and model.backbone.maxpool.ceil_mode is True + model.init_weights() + model.train() + batch_size = 2 + imgs = torch.randn(batch_size, 3, 32, 64) + feat = model(imgs) + + assert model.psp_modules[0][0].output_size == 1 + assert model.psp_modules[1][0].output_size == 2 + assert model.psp_modules[2][0].output_size == 3 + assert model.psp_bottleneck.padding == 1 + assert model.conv_sub1[0].padding == 1 + + assert len(feat) == 3 + assert feat[0].shape == torch.Size([batch_size, 64, 4, 8]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_mae.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_mae.py new file mode 100644 index 0000000..562d067 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_mae.py @@ -0,0 +1,183 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones.mae import MAE +from .utils import check_norm_state + + +def test_mae_backbone(): + with pytest.raises(TypeError): + # pretrained must be a string path + model = MAE() + model.init_weights(pretrained=0) + + with pytest.raises(TypeError): + # img_size must be int or tuple + model = MAE(img_size=512.0) + + with pytest.raises(TypeError): + # out_indices must be int ,list or tuple + model = MAE(out_indices=1.) + + with pytest.raises(AssertionError): + # The length of img_size tuple must be lower than 3. + MAE(img_size=(224, 224, 224)) + + with pytest.raises(TypeError): + # Pretrained must be None or Str. + MAE(pretrained=123) + + # Test img_size isinstance tuple + imgs = torch.randn(1, 3, 224, 224) + model = MAE(img_size=(224, )) + model.init_weights() + model(imgs) + + # Test img_size isinstance tuple + imgs = torch.randn(1, 3, 224, 224) + model = MAE(img_size=(224, 224)) + model(imgs) + + # Test norm_eval = True + model = MAE(norm_eval=True) + model.train() + + # Test BEiT backbone with input size of 224 and patch size of 16 + model = MAE() + model.init_weights() + model.train() + + # Test out_indices = list + model = MAE(out_indices=[2, 4, 8, 12]) + model.train() + + assert check_norm_state(model.modules(), True) + + # Test image size = (224, 224) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test MAE backbone with input size of 256 and patch size of 16 + model = MAE(img_size=(256, 256)) + model.init_weights() + model.train() + imgs = torch.randn(1, 3, 256, 256) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 16, 16) + + # Test MAE backbone with input size of 32 and patch size of 16 + model = MAE(img_size=(32, 32)) + model.init_weights() + model.train() + imgs = torch.randn(1, 3, 32, 32) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 2, 2) + + # Test unbalanced size input image + model = MAE(img_size=(112, 224)) + model.init_weights() + model.train() + imgs = torch.randn(1, 3, 112, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 7, 14) + + # Test irregular input image + model = MAE(img_size=(234, 345)) + model.init_weights() + model.train() + imgs = torch.randn(1, 3, 234, 345) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 21) + + # Test init_values=0 + model = MAE(init_values=0) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test final norm + model = MAE(final_norm=True) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test patch norm + model = MAE(patch_norm=True) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + +def test_mae_init(): + path = 'PATH_THAT_DO_NOT_EXIST' + # Test all combinations of pretrained and init_cfg + # pretrained=None, init_cfg=None + model = MAE(pretrained=None, init_cfg=None) + assert model.init_cfg is None + model.init_weights() + + # pretrained=None + # init_cfg loads pretrain from an non-existent file + model = MAE( + pretrained=None, init_cfg=dict(type='Pretrained', checkpoint=path)) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # test resize_rel_pos_embed + value = torch.randn(732, 16) + abs_pos_embed_value = torch.rand(1, 17, 768) + ckpt = { + 'state_dict': { + 'layers.0.attn.relative_position_index': 0, + 'layers.0.attn.relative_position_bias_table': value, + 'pos_embed': abs_pos_embed_value + } + } + model = MAE(img_size=(512, 512)) + with pytest.raises(AttributeError): + model.resize_rel_pos_embed(ckpt) + + # test resize abs pos embed + ckpt = model.resize_abs_pos_embed(ckpt['state_dict']) + + # pretrained=None + # init_cfg=123, whose type is unsupported + model = MAE(pretrained=None, init_cfg=123) + with pytest.raises(TypeError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg=None + model = MAE(pretrained=path, init_cfg=None) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + model = MAE( + pretrained=path, init_cfg=dict(type='Pretrained', checkpoint=path)) + with pytest.raises(AssertionError): + model = MAE(pretrained=path, init_cfg=123) + + # pretrain=123, whose type is unsupported + # init_cfg=None + with pytest.raises(TypeError): + model = MAE(pretrained=123, init_cfg=None) + + # pretrain=123, whose type is unsupported + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + model = MAE( + pretrained=123, init_cfg=dict(type='Pretrained', checkpoint=path)) + + # pretrain=123, whose type is unsupported + # init_cfg=123, whose type is unsupported + with pytest.raises(AssertionError): + model = MAE(pretrained=123, init_cfg=123) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_mit.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_mit.py new file mode 100644 index 0000000..72f74fe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_mit.py @@ -0,0 +1,122 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import MixVisionTransformer +from mmseg.models.backbones.mit import (EfficientMultiheadAttention, MixFFN, + TransformerEncoderLayer) + + +def test_mit(): + with pytest.raises(TypeError): + # Pretrained represents pretrain url and must be str or None. + MixVisionTransformer(pretrained=123) + + # Test normal input + H, W = (224, 224) + temp = torch.randn((1, 3, H, W)) + model = MixVisionTransformer( + embed_dims=32, num_heads=[1, 2, 5, 8], out_indices=(0, 1, 2, 3)) + model.init_weights() + outs = model(temp) + assert outs[0].shape == (1, 32, H // 4, W // 4) + assert outs[1].shape == (1, 64, H // 8, W // 8) + assert outs[2].shape == (1, 160, H // 16, W // 16) + assert outs[3].shape == (1, 256, H // 32, W // 32) + + # Test non-squared input + H, W = (224, 256) + temp = torch.randn((1, 3, H, W)) + outs = model(temp) + assert outs[0].shape == (1, 32, H // 4, W // 4) + assert outs[1].shape == (1, 64, H // 8, W // 8) + assert outs[2].shape == (1, 160, H // 16, W // 16) + assert outs[3].shape == (1, 256, H // 32, W // 32) + + # Test MixFFN + FFN = MixFFN(64, 128) + hw_shape = (32, 32) + token_len = 32 * 32 + temp = torch.randn((1, token_len, 64)) + # Self identity + out = FFN(temp, hw_shape) + assert out.shape == (1, token_len, 64) + # Out identity + outs = FFN(temp, hw_shape, temp) + assert out.shape == (1, token_len, 64) + + # Test EfficientMHA + MHA = EfficientMultiheadAttention(64, 2) + hw_shape = (32, 32) + token_len = 32 * 32 + temp = torch.randn((1, token_len, 64)) + # Self identity + out = MHA(temp, hw_shape) + assert out.shape == (1, token_len, 64) + # Out identity + outs = MHA(temp, hw_shape, temp) + assert out.shape == (1, token_len, 64) + + # Test TransformerEncoderLayer with checkpoint forward + block = TransformerEncoderLayer( + embed_dims=64, num_heads=4, feedforward_channels=256, with_cp=True) + assert block.with_cp + x = torch.randn(1, 56 * 56, 64) + x_out = block(x, (56, 56)) + assert x_out.shape == torch.Size([1, 56 * 56, 64]) + + +def test_mit_init(): + path = 'PATH_THAT_DO_NOT_EXIST' + # Test all combinations of pretrained and init_cfg + # pretrained=None, init_cfg=None + model = MixVisionTransformer(pretrained=None, init_cfg=None) + assert model.init_cfg is None + model.init_weights() + + # pretrained=None + # init_cfg loads pretrain from an non-existent file + model = MixVisionTransformer( + pretrained=None, init_cfg=dict(type='Pretrained', checkpoint=path)) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # pretrained=None + # init_cfg=123, whose type is unsupported + model = MixVisionTransformer(pretrained=None, init_cfg=123) + with pytest.raises(TypeError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg=None + model = MixVisionTransformer(pretrained=path, init_cfg=None) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + MixVisionTransformer( + pretrained=path, init_cfg=dict(type='Pretrained', checkpoint=path)) + with pytest.raises(AssertionError): + MixVisionTransformer(pretrained=path, init_cfg=123) + + # pretrain=123, whose type is unsupported + # init_cfg=None + with pytest.raises(TypeError): + MixVisionTransformer(pretrained=123, init_cfg=None) + + # pretrain=123, whose type is unsupported + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + MixVisionTransformer( + pretrained=123, init_cfg=dict(type='Pretrained', checkpoint=path)) + + # pretrain=123, whose type is unsupported + # init_cfg=123, whose type is unsupported + with pytest.raises(AssertionError): + MixVisionTransformer(pretrained=123, init_cfg=123) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_mobilenet_v3.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_mobilenet_v3.py new file mode 100644 index 0000000..769ee14 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_mobilenet_v3.py @@ -0,0 +1,67 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import MobileNetV3 + + +def test_mobilenet_v3(): + with pytest.raises(AssertionError): + # check invalid arch + MobileNetV3('big') + + with pytest.raises(AssertionError): + # check invalid reduction_factor + MobileNetV3(reduction_factor=0) + + with pytest.raises(ValueError): + # check invalid out_indices + MobileNetV3(out_indices=(0, 1, 15)) + + with pytest.raises(ValueError): + # check invalid frozen_stages + MobileNetV3(frozen_stages=15) + + with pytest.raises(TypeError): + # check invalid pretrained + model = MobileNetV3() + model.init_weights(pretrained=8) + + # Test MobileNetV3 with default settings + model = MobileNetV3() + model.init_weights() + model.train() + + imgs = torch.randn(2, 3, 56, 56) + feat = model(imgs) + assert len(feat) == 3 + assert feat[0].shape == (2, 16, 28, 28) + assert feat[1].shape == (2, 16, 14, 14) + assert feat[2].shape == (2, 576, 7, 7) + + # Test MobileNetV3 with arch = 'large' + model = MobileNetV3(arch='large', out_indices=(1, 3, 16)) + model.init_weights() + model.train() + + imgs = torch.randn(2, 3, 56, 56) + feat = model(imgs) + assert len(feat) == 3 + assert feat[0].shape == (2, 16, 28, 28) + assert feat[1].shape == (2, 24, 14, 14) + assert feat[2].shape == (2, 960, 7, 7) + + # Test MobileNetV3 with norm_eval True, with_cp True and frozen_stages=5 + model = MobileNetV3(norm_eval=True, with_cp=True, frozen_stages=5) + with pytest.raises(TypeError): + # check invalid pretrained + model.init_weights(pretrained=8) + model.init_weights() + model.train() + + imgs = torch.randn(2, 3, 56, 56) + feat = model(imgs) + assert len(feat) == 3 + assert feat[0].shape == (2, 16, 28, 28) + assert feat[1].shape == (2, 16, 14, 14) + assert feat[2].shape == (2, 576, 7, 7) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_resnest.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_resnest.py new file mode 100644 index 0000000..3013f34 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_resnest.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import ResNeSt +from mmseg.models.backbones.resnest import Bottleneck as BottleneckS + + +def test_resnest_bottleneck(): + with pytest.raises(AssertionError): + # Style must be in ['pytorch', 'caffe'] + BottleneckS(64, 64, radix=2, reduction_factor=4, style='tensorflow') + + # Test ResNeSt Bottleneck structure + block = BottleneckS( + 64, 256, radix=2, reduction_factor=4, stride=2, style='pytorch') + assert block.avd_layer.stride == 2 + assert block.conv2.channels == 256 + + # Test ResNeSt Bottleneck forward + block = BottleneckS(64, 16, radix=2, reduction_factor=4) + x = torch.randn(2, 64, 56, 56) + x_out = block(x) + assert x_out.shape == torch.Size([2, 64, 56, 56]) + + +def test_resnest_backbone(): + with pytest.raises(KeyError): + # ResNeSt depth should be in [50, 101, 152, 200] + ResNeSt(depth=18) + + # Test ResNeSt with radix 2, reduction_factor 4 + model = ResNeSt( + depth=50, radix=2, reduction_factor=4, out_indices=(0, 1, 2, 3)) + model.init_weights() + model.train() + + imgs = torch.randn(2, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([2, 256, 56, 56]) + assert feat[1].shape == torch.Size([2, 512, 28, 28]) + assert feat[2].shape == torch.Size([2, 1024, 14, 14]) + assert feat[3].shape == torch.Size([2, 2048, 7, 7]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_resnet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_resnet.py new file mode 100644 index 0000000..fa632f5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_resnet.py @@ -0,0 +1,575 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch +from mmcv.ops import DeformConv2dPack +from mmcv.utils.parrots_wrapper import _BatchNorm +from torch.nn.modules import AvgPool2d, GroupNorm + +from mmseg.models.backbones import ResNet, ResNetV1d +from mmseg.models.backbones.resnet import BasicBlock, Bottleneck +from mmseg.models.utils import ResLayer +from .utils import all_zeros, check_norm_state, is_block, is_norm + + +def test_resnet_basic_block(): + with pytest.raises(AssertionError): + # Not implemented yet. + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) + BasicBlock(64, 64, dcn=dcn) + + with pytest.raises(AssertionError): + # Not implemented yet. + plugins = [ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + position='after_conv3') + ] + BasicBlock(64, 64, plugins=plugins) + + with pytest.raises(AssertionError): + # Not implemented yet + plugins = [ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + position='after_conv2') + ] + BasicBlock(64, 64, plugins=plugins) + + # Test BasicBlock with checkpoint forward + block = BasicBlock(16, 16, with_cp=True) + assert block.with_cp + x = torch.randn(1, 16, 28, 28) + x_out = block(x) + assert x_out.shape == torch.Size([1, 16, 28, 28]) + + # test BasicBlock structure and forward + block = BasicBlock(32, 32) + assert block.conv1.in_channels == 32 + assert block.conv1.out_channels == 32 + assert block.conv1.kernel_size == (3, 3) + assert block.conv2.in_channels == 32 + assert block.conv2.out_channels == 32 + assert block.conv2.kernel_size == (3, 3) + x = torch.randn(1, 32, 28, 28) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 28, 28]) + + +def test_resnet_bottleneck(): + with pytest.raises(AssertionError): + # Style must be in ['pytorch', 'caffe'] + Bottleneck(64, 64, style='tensorflow') + + with pytest.raises(AssertionError): + # Allowed positions are 'after_conv1', 'after_conv2', 'after_conv3' + plugins = [ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + position='after_conv4') + ] + Bottleneck(64, 16, plugins=plugins) + + with pytest.raises(AssertionError): + # Need to specify different postfix to avoid duplicate plugin name + plugins = [ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + position='after_conv3'), + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + position='after_conv3') + ] + Bottleneck(64, 16, plugins=plugins) + + with pytest.raises(KeyError): + # Plugin type is not supported + plugins = [dict(cfg=dict(type='WrongPlugin'), position='after_conv3')] + Bottleneck(64, 16, plugins=plugins) + + # Test Bottleneck with checkpoint forward + block = Bottleneck(64, 16, with_cp=True) + assert block.with_cp + x = torch.randn(1, 64, 56, 56) + x_out = block(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + # Test Bottleneck style + block = Bottleneck(64, 64, stride=2, style='pytorch') + assert block.conv1.stride == (1, 1) + assert block.conv2.stride == (2, 2) + block = Bottleneck(64, 64, stride=2, style='caffe') + assert block.conv1.stride == (2, 2) + assert block.conv2.stride == (1, 1) + + # Test Bottleneck DCN + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) + with pytest.raises(AssertionError): + Bottleneck(64, 64, dcn=dcn, conv_cfg=dict(type='Conv')) + block = Bottleneck(64, 64, dcn=dcn) + assert isinstance(block.conv2, DeformConv2dPack) + + # Test Bottleneck forward + block = Bottleneck(64, 16) + x = torch.randn(1, 64, 56, 56) + x_out = block(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + # Test Bottleneck with 1 ContextBlock after conv3 + plugins = [ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + position='after_conv3') + ] + block = Bottleneck(64, 16, plugins=plugins) + assert block.context_block.in_channels == 64 + x = torch.randn(1, 64, 56, 56) + x_out = block(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + # Test Bottleneck with 1 GeneralizedAttention after conv2 + plugins = [ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + position='after_conv2') + ] + block = Bottleneck(64, 16, plugins=plugins) + assert block.gen_attention_block.in_channels == 16 + x = torch.randn(1, 64, 56, 56) + x_out = block(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + # Test Bottleneck with 1 GeneralizedAttention after conv2, 1 NonLocal2d + # after conv2, 1 ContextBlock after conv3 + plugins = [ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + position='after_conv2'), + dict(cfg=dict(type='NonLocal2d'), position='after_conv2'), + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + position='after_conv3') + ] + block = Bottleneck(64, 16, plugins=plugins) + assert block.gen_attention_block.in_channels == 16 + assert block.nonlocal_block.in_channels == 16 + assert block.context_block.in_channels == 64 + x = torch.randn(1, 64, 56, 56) + x_out = block(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + # Test Bottleneck with 1 ContextBlock after conv2, 2 ContextBlock after + # conv3 + plugins = [ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=1), + position='after_conv2'), + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=2), + position='after_conv3'), + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=3), + position='after_conv3') + ] + block = Bottleneck(64, 16, plugins=plugins) + assert block.context_block1.in_channels == 16 + assert block.context_block2.in_channels == 64 + assert block.context_block3.in_channels == 64 + x = torch.randn(1, 64, 56, 56) + x_out = block(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + +def test_resnet_res_layer(): + # Test ResLayer of 3 Bottleneck w\o downsample + layer = ResLayer(Bottleneck, 64, 16, 3) + assert len(layer) == 3 + assert layer[0].conv1.in_channels == 64 + assert layer[0].conv1.out_channels == 16 + for i in range(1, len(layer)): + assert layer[i].conv1.in_channels == 64 + assert layer[i].conv1.out_channels == 16 + for i in range(len(layer)): + assert layer[i].downsample is None + x = torch.randn(1, 64, 56, 56) + x_out = layer(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + # Test ResLayer of 3 Bottleneck with downsample + layer = ResLayer(Bottleneck, 64, 64, 3) + assert layer[0].downsample[0].out_channels == 256 + for i in range(1, len(layer)): + assert layer[i].downsample is None + x = torch.randn(1, 64, 56, 56) + x_out = layer(x) + assert x_out.shape == torch.Size([1, 256, 56, 56]) + + # Test ResLayer of 3 Bottleneck with stride=2 + layer = ResLayer(Bottleneck, 64, 64, 3, stride=2) + assert layer[0].downsample[0].out_channels == 256 + assert layer[0].downsample[0].stride == (2, 2) + for i in range(1, len(layer)): + assert layer[i].downsample is None + x = torch.randn(1, 64, 56, 56) + x_out = layer(x) + assert x_out.shape == torch.Size([1, 256, 28, 28]) + + # Test ResLayer of 3 Bottleneck with stride=2 and average downsample + layer = ResLayer(Bottleneck, 64, 64, 3, stride=2, avg_down=True) + assert isinstance(layer[0].downsample[0], AvgPool2d) + assert layer[0].downsample[1].out_channels == 256 + assert layer[0].downsample[1].stride == (1, 1) + for i in range(1, len(layer)): + assert layer[i].downsample is None + x = torch.randn(1, 64, 56, 56) + x_out = layer(x) + assert x_out.shape == torch.Size([1, 256, 28, 28]) + + # Test ResLayer of 3 Bottleneck with dilation=2 + layer = ResLayer(Bottleneck, 64, 16, 3, dilation=2) + for i in range(len(layer)): + assert layer[i].conv2.dilation == (2, 2) + x = torch.randn(1, 64, 56, 56) + x_out = layer(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + # Test ResLayer of 3 Bottleneck with dilation=2, contract_dilation=True + layer = ResLayer(Bottleneck, 64, 16, 3, dilation=2, contract_dilation=True) + assert layer[0].conv2.dilation == (1, 1) + for i in range(1, len(layer)): + assert layer[i].conv2.dilation == (2, 2) + x = torch.randn(1, 64, 56, 56) + x_out = layer(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + # Test ResLayer of 3 Bottleneck with dilation=2, multi_grid + layer = ResLayer(Bottleneck, 64, 16, 3, dilation=2, multi_grid=(1, 2, 4)) + assert layer[0].conv2.dilation == (1, 1) + assert layer[1].conv2.dilation == (2, 2) + assert layer[2].conv2.dilation == (4, 4) + x = torch.randn(1, 64, 56, 56) + x_out = layer(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + +def test_resnet_backbone(): + """Test resnet backbone.""" + with pytest.raises(KeyError): + # ResNet depth should be in [18, 34, 50, 101, 152] + ResNet(20) + + with pytest.raises(AssertionError): + # In ResNet: 1 <= num_stages <= 4 + ResNet(50, num_stages=0) + + with pytest.raises(AssertionError): + # len(stage_with_dcn) == num_stages + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) + ResNet(50, dcn=dcn, stage_with_dcn=(True, )) + + with pytest.raises(AssertionError): + # len(stage_with_plugin) == num_stages + plugins = [ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True), + position='after_conv3') + ] + ResNet(50, plugins=plugins) + + with pytest.raises(AssertionError): + # In ResNet: 1 <= num_stages <= 4 + ResNet(18, num_stages=5) + + with pytest.raises(AssertionError): + # len(strides) == len(dilations) == num_stages + ResNet(18, strides=(1, ), dilations=(1, 1), num_stages=3) + + with pytest.raises(TypeError): + # pretrained must be a string path + model = ResNet(18, pretrained=0) + model.init_weights() + + with pytest.raises(AssertionError): + # Style must be in ['pytorch', 'caffe'] + ResNet(50, style='tensorflow') + + # Test ResNet18 norm_eval=True + model = ResNet(18, norm_eval=True) + model.init_weights() + model.train() + assert check_norm_state(model.modules(), False) + + # Test ResNet18 with torchvision pretrained weight + model = ResNet( + depth=18, norm_eval=True, pretrained='torchvision://resnet18') + model.init_weights() + model.train() + assert check_norm_state(model.modules(), False) + + # Test ResNet18 with first stage frozen + frozen_stages = 1 + model = ResNet(18, frozen_stages=frozen_stages) + model.init_weights() + model.train() + assert model.norm1.training is False + for layer in [model.conv1, model.norm1]: + for param in layer.parameters(): + assert param.requires_grad is False + for i in range(1, frozen_stages + 1): + layer = getattr(model, 'layer{}'.format(i)) + for mod in layer.modules(): + if isinstance(mod, _BatchNorm): + assert mod.training is False + for param in layer.parameters(): + assert param.requires_grad is False + + # Test ResNet18V1d with first stage frozen + model = ResNetV1d(depth=18, frozen_stages=frozen_stages) + assert len(model.stem) == 9 + model.init_weights() + model.train() + check_norm_state(model.stem, False) + for param in model.stem.parameters(): + assert param.requires_grad is False + for i in range(1, frozen_stages + 1): + layer = getattr(model, 'layer{}'.format(i)) + for mod in layer.modules(): + if isinstance(mod, _BatchNorm): + assert mod.training is False + for param in layer.parameters(): + assert param.requires_grad is False + + # Test ResNet18 forward + model = ResNet(18) + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 64, 56, 56]) + assert feat[1].shape == torch.Size([1, 128, 28, 28]) + assert feat[2].shape == torch.Size([1, 256, 14, 14]) + assert feat[3].shape == torch.Size([1, 512, 7, 7]) + + # Test ResNet18 with BatchNorm forward + model = ResNet(18) + for m in model.modules(): + if is_norm(m): + assert isinstance(m, _BatchNorm) + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 64, 56, 56]) + assert feat[1].shape == torch.Size([1, 128, 28, 28]) + assert feat[2].shape == torch.Size([1, 256, 14, 14]) + assert feat[3].shape == torch.Size([1, 512, 7, 7]) + + # Test ResNet18 with layers 1, 2, 3 out forward + model = ResNet(18, out_indices=(0, 1, 2)) + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 112, 112) + feat = model(imgs) + assert len(feat) == 3 + assert feat[0].shape == torch.Size([1, 64, 28, 28]) + assert feat[1].shape == torch.Size([1, 128, 14, 14]) + assert feat[2].shape == torch.Size([1, 256, 7, 7]) + + # Test ResNet18 with checkpoint forward + model = ResNet(18, with_cp=True) + for m in model.modules(): + if is_block(m): + assert m.with_cp + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 64, 56, 56]) + assert feat[1].shape == torch.Size([1, 128, 28, 28]) + assert feat[2].shape == torch.Size([1, 256, 14, 14]) + assert feat[3].shape == torch.Size([1, 512, 7, 7]) + + # Test ResNet18 with checkpoint forward + model = ResNet(18, with_cp=True) + for m in model.modules(): + if is_block(m): + assert m.with_cp + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 64, 56, 56]) + assert feat[1].shape == torch.Size([1, 128, 28, 28]) + assert feat[2].shape == torch.Size([1, 256, 14, 14]) + assert feat[3].shape == torch.Size([1, 512, 7, 7]) + + # Test ResNet18 with GroupNorm forward + model = ResNet( + 18, norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)) + for m in model.modules(): + if is_norm(m): + assert isinstance(m, GroupNorm) + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 64, 56, 56]) + assert feat[1].shape == torch.Size([1, 128, 28, 28]) + assert feat[2].shape == torch.Size([1, 256, 14, 14]) + assert feat[3].shape == torch.Size([1, 512, 7, 7]) + + # Test ResNet50 with 1 GeneralizedAttention after conv2, 1 NonLocal2d + # after conv2, 1 ContextBlock after conv3 in layers 2, 3, 4 + plugins = [ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + stages=(False, True, True, True), + position='after_conv2'), + dict(cfg=dict(type='NonLocal2d'), position='after_conv2'), + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + stages=(False, True, True, False), + position='after_conv3') + ] + model = ResNet(50, plugins=plugins) + for m in model.layer1.modules(): + if is_block(m): + assert not hasattr(m, 'context_block') + assert not hasattr(m, 'gen_attention_block') + assert m.nonlocal_block.in_channels == 64 + for m in model.layer2.modules(): + if is_block(m): + assert m.nonlocal_block.in_channels == 128 + assert m.gen_attention_block.in_channels == 128 + assert m.context_block.in_channels == 512 + + for m in model.layer3.modules(): + if is_block(m): + assert m.nonlocal_block.in_channels == 256 + assert m.gen_attention_block.in_channels == 256 + assert m.context_block.in_channels == 1024 + + for m in model.layer4.modules(): + if is_block(m): + assert m.nonlocal_block.in_channels == 512 + assert m.gen_attention_block.in_channels == 512 + assert not hasattr(m, 'context_block') + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 256, 56, 56]) + assert feat[1].shape == torch.Size([1, 512, 28, 28]) + assert feat[2].shape == torch.Size([1, 1024, 14, 14]) + assert feat[3].shape == torch.Size([1, 2048, 7, 7]) + + # Test ResNet50 with 1 ContextBlock after conv2, 1 ContextBlock after + # conv3 in layers 2, 3, 4 + plugins = [ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=1), + stages=(False, True, True, False), + position='after_conv3'), + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=2), + stages=(False, True, True, False), + position='after_conv3') + ] + + model = ResNet(50, plugins=plugins) + for m in model.layer1.modules(): + if is_block(m): + assert not hasattr(m, 'context_block') + assert not hasattr(m, 'context_block1') + assert not hasattr(m, 'context_block2') + for m in model.layer2.modules(): + if is_block(m): + assert not hasattr(m, 'context_block') + assert m.context_block1.in_channels == 512 + assert m.context_block2.in_channels == 512 + + for m in model.layer3.modules(): + if is_block(m): + assert not hasattr(m, 'context_block') + assert m.context_block1.in_channels == 1024 + assert m.context_block2.in_channels == 1024 + + for m in model.layer4.modules(): + if is_block(m): + assert not hasattr(m, 'context_block') + assert not hasattr(m, 'context_block1') + assert not hasattr(m, 'context_block2') + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 256, 56, 56]) + assert feat[1].shape == torch.Size([1, 512, 28, 28]) + assert feat[2].shape == torch.Size([1, 1024, 14, 14]) + assert feat[3].shape == torch.Size([1, 2048, 7, 7]) + + # Test ResNet18 zero initialization of residual + model = ResNet(18, zero_init_residual=True) + model.init_weights() + for m in model.modules(): + if isinstance(m, Bottleneck): + assert all_zeros(m.norm3) + elif isinstance(m, BasicBlock): + assert all_zeros(m.norm2) + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 64, 56, 56]) + assert feat[1].shape == torch.Size([1, 128, 28, 28]) + assert feat[2].shape == torch.Size([1, 256, 14, 14]) + assert feat[3].shape == torch.Size([1, 512, 7, 7]) + + # Test ResNetV1d forward + model = ResNetV1d(depth=18) + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 64, 56, 56]) + assert feat[1].shape == torch.Size([1, 128, 28, 28]) + assert feat[2].shape == torch.Size([1, 256, 14, 14]) + assert feat[3].shape == torch.Size([1, 512, 7, 7]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_resnext.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_resnext.py new file mode 100644 index 0000000..2aecaf0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_resnext.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import ResNeXt +from mmseg.models.backbones.resnext import Bottleneck as BottleneckX +from .utils import is_block + + +def test_renext_bottleneck(): + with pytest.raises(AssertionError): + # Style must be in ['pytorch', 'caffe'] + BottleneckX(64, 64, groups=32, base_width=4, style='tensorflow') + + # Test ResNeXt Bottleneck structure + block = BottleneckX( + 64, 64, groups=32, base_width=4, stride=2, style='pytorch') + assert block.conv2.stride == (2, 2) + assert block.conv2.groups == 32 + assert block.conv2.out_channels == 128 + + # Test ResNeXt Bottleneck with DCN + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) + with pytest.raises(AssertionError): + # conv_cfg must be None if dcn is not None + BottleneckX( + 64, + 64, + groups=32, + base_width=4, + dcn=dcn, + conv_cfg=dict(type='Conv')) + BottleneckX(64, 64, dcn=dcn) + + # Test ResNeXt Bottleneck forward + block = BottleneckX(64, 16, groups=32, base_width=4) + x = torch.randn(1, 64, 56, 56) + x_out = block(x) + assert x_out.shape == torch.Size([1, 64, 56, 56]) + + +def test_resnext_backbone(): + with pytest.raises(KeyError): + # ResNeXt depth should be in [50, 101, 152] + ResNeXt(depth=18) + + # Test ResNeXt with group 32, base_width 4 + model = ResNeXt(depth=50, groups=32, base_width=4) + print(model) + for m in model.modules(): + if is_block(m): + assert m.conv2.groups == 32 + model.init_weights() + model.train() + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert len(feat) == 4 + assert feat[0].shape == torch.Size([1, 256, 56, 56]) + assert feat[1].shape == torch.Size([1, 512, 28, 28]) + assert feat[2].shape == torch.Size([1, 1024, 14, 14]) + assert feat[3].shape == torch.Size([1, 2048, 7, 7]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_stdc.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_stdc.py new file mode 100644 index 0000000..1e3862b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_stdc.py @@ -0,0 +1,131 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import STDCContextPathNet +from mmseg.models.backbones.stdc import (AttentionRefinementModule, + FeatureFusionModule, STDCModule, + STDCNet) + + +def test_stdc_context_path_net(): + # Test STDCContextPathNet Standard Forward + model = STDCContextPathNet( + backbone_cfg=dict( + type='STDCNet', + stdc_type='STDCNet1', + in_channels=3, + channels=(32, 64, 256, 512, 1024), + bottleneck_type='cat', + num_convs=4, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + with_final_conv=True), + last_in_channels=(1024, 512), + out_channels=128, + ffm_cfg=dict(in_channels=384, out_channels=256, scale_factor=4)) + model.init_weights() + model.train() + batch_size = 2 + imgs = torch.randn(batch_size, 3, 256, 512) + feat = model(imgs) + + assert len(feat) == 4 + # output for segment Head + assert feat[0].shape == torch.Size([batch_size, 256, 32, 64]) + # for auxiliary head 1 + assert feat[1].shape == torch.Size([batch_size, 128, 16, 32]) + # for auxiliary head 2 + assert feat[2].shape == torch.Size([batch_size, 128, 32, 64]) + # for auxiliary head 3 + assert feat[3].shape == torch.Size([batch_size, 256, 32, 64]) + + # Test input with rare shape + batch_size = 2 + imgs = torch.randn(batch_size, 3, 527, 279) + model = STDCContextPathNet( + backbone_cfg=dict( + type='STDCNet', + stdc_type='STDCNet1', + in_channels=3, + channels=(32, 64, 256, 512, 1024), + bottleneck_type='add', + num_convs=4, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + with_final_conv=False), + last_in_channels=(1024, 512), + out_channels=128, + ffm_cfg=dict(in_channels=384, out_channels=256, scale_factor=4)) + model.init_weights() + model.train() + feat = model(imgs) + assert len(feat) == 4 + + +def test_stdcnet(): + with pytest.raises(AssertionError): + # STDC backbone constraints. + STDCNet( + stdc_type='STDCNet3', + in_channels=3, + channels=(32, 64, 256, 512, 1024), + bottleneck_type='cat', + num_convs=4, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + with_final_conv=False) + + with pytest.raises(AssertionError): + # STDC bottleneck type constraints. + STDCNet( + stdc_type='STDCNet1', + in_channels=3, + channels=(32, 64, 256, 512, 1024), + bottleneck_type='dog', + num_convs=4, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + with_final_conv=False) + + with pytest.raises(AssertionError): + # STDC channels length constraints. + STDCNet( + stdc_type='STDCNet1', + in_channels=3, + channels=(16, 32, 64, 256, 512, 1024), + bottleneck_type='cat', + num_convs=4, + norm_cfg=dict(type='BN', requires_grad=True), + act_cfg=dict(type='ReLU'), + with_final_conv=False) + + +def test_feature_fusion_module(): + x_ffm = FeatureFusionModule(in_channels=64, out_channels=32) + assert x_ffm.conv0.in_channels == 64 + assert x_ffm.attention[1].in_channels == 32 + assert x_ffm.attention[2].in_channels == 8 + assert x_ffm.attention[2].out_channels == 32 + x1 = torch.randn(2, 32, 32, 64) + x2 = torch.randn(2, 32, 32, 64) + x_out = x_ffm(x1, x2) + assert x_out.shape == torch.Size([2, 32, 32, 64]) + + +def test_attention_refinement_module(): + x_arm = AttentionRefinementModule(128, 32) + assert x_arm.conv_layer.in_channels == 128 + assert x_arm.atten_conv_layer[1].conv.out_channels == 32 + x = torch.randn(2, 128, 32, 64) + x_out = x_arm(x) + assert x_out.shape == torch.Size([2, 32, 32, 64]) + + +def test_stdc_module(): + x_stdc = STDCModule(in_channels=32, out_channels=32, stride=4) + assert x_stdc.layers[0].conv.in_channels == 32 + assert x_stdc.layers[3].conv.out_channels == 4 + x = torch.randn(2, 32, 32, 64) + x_out = x_stdc(x) + assert x_out.shape == torch.Size([2, 32, 32, 64]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_swin.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_swin.py new file mode 100644 index 0000000..8d14d47 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_swin.py @@ -0,0 +1,100 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones.swin import SwinBlock, SwinTransformer + + +def test_swin_block(): + # test SwinBlock structure and forward + block = SwinBlock(embed_dims=32, num_heads=4, feedforward_channels=128) + assert block.ffn.embed_dims == 32 + assert block.attn.w_msa.num_heads == 4 + assert block.ffn.feedforward_channels == 128 + x = torch.randn(1, 56 * 56, 32) + x_out = block(x, (56, 56)) + assert x_out.shape == torch.Size([1, 56 * 56, 32]) + + # Test BasicBlock with checkpoint forward + block = SwinBlock( + embed_dims=64, num_heads=4, feedforward_channels=256, with_cp=True) + assert block.with_cp + x = torch.randn(1, 56 * 56, 64) + x_out = block(x, (56, 56)) + assert x_out.shape == torch.Size([1, 56 * 56, 64]) + + +def test_swin_transformer(): + """Test Swin Transformer backbone.""" + + with pytest.raises(TypeError): + # Pretrained arg must be str or None. + SwinTransformer(pretrained=123) + + with pytest.raises(AssertionError): + # Because swin uses non-overlapping patch embed, so the stride of patch + # embed must be equal to patch size. + SwinTransformer(strides=(2, 2, 2, 2), patch_size=4) + + # test pretrained image size + with pytest.raises(AssertionError): + SwinTransformer(pretrain_img_size=(112, 112, 112)) + + # Test absolute position embedding + temp = torch.randn((1, 3, 112, 112)) + model = SwinTransformer(pretrain_img_size=112, use_abs_pos_embed=True) + model.init_weights() + model(temp) + + # Test patch norm + model = SwinTransformer(patch_norm=False) + model(temp) + + # Test normal inference + temp = torch.randn((1, 3, 256, 256)) + model = SwinTransformer() + outs = model(temp) + assert outs[0].shape == (1, 96, 64, 64) + assert outs[1].shape == (1, 192, 32, 32) + assert outs[2].shape == (1, 384, 16, 16) + assert outs[3].shape == (1, 768, 8, 8) + + # Test abnormal inference size + temp = torch.randn((1, 3, 255, 255)) + model = SwinTransformer() + outs = model(temp) + assert outs[0].shape == (1, 96, 64, 64) + assert outs[1].shape == (1, 192, 32, 32) + assert outs[2].shape == (1, 384, 16, 16) + assert outs[3].shape == (1, 768, 8, 8) + + # Test abnormal inference size + temp = torch.randn((1, 3, 112, 137)) + model = SwinTransformer() + outs = model(temp) + assert outs[0].shape == (1, 96, 28, 35) + assert outs[1].shape == (1, 192, 14, 18) + assert outs[2].shape == (1, 384, 7, 9) + assert outs[3].shape == (1, 768, 4, 5) + + # Test frozen + model = SwinTransformer(frozen_stages=4) + model.train() + for p in model.parameters(): + assert not p.requires_grad + + # Test absolute position embedding frozen + model = SwinTransformer(frozen_stages=4, use_abs_pos_embed=True) + model.train() + for p in model.parameters(): + assert not p.requires_grad + + # Test Swin with checkpoint forward + temp = torch.randn((1, 3, 56, 56)) + model = SwinTransformer(with_cp=True) + for m in model.modules(): + if isinstance(m, SwinBlock): + assert m.with_cp + model.init_weights() + model.train() + model(temp) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_timm_backbone.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_timm_backbone.py new file mode 100644 index 0000000..85ef9aa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_timm_backbone.py @@ -0,0 +1,133 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones import TIMMBackbone +from .utils import check_norm_state + + +def test_timm_backbone(): + with pytest.raises(TypeError): + # pretrained must be a string path + model = TIMMBackbone() + model.init_weights(pretrained=0) + + # Test different norm_layer, can be: 'SyncBN', 'BN2d', 'GN', 'LN', 'IN' + # Test resnet18 from timm, norm_layer='BN2d' + model = TIMMBackbone( + model_name='resnet18', + features_only=True, + pretrained=False, + output_stride=32, + norm_layer='BN2d') + + # Test resnet18 from timm, norm_layer='SyncBN' + model = TIMMBackbone( + model_name='resnet18', + features_only=True, + pretrained=False, + output_stride=32, + norm_layer='SyncBN') + + # Test resnet18 from timm, features_only=True, output_stride=32 + model = TIMMBackbone( + model_name='resnet18', + features_only=True, + pretrained=False, + output_stride=32) + model.init_weights() + model.train() + assert check_norm_state(model.modules(), True) + + imgs = torch.randn(1, 3, 224, 224) + feats = model(imgs) + feats = [feat.shape for feat in feats] + assert len(feats) == 5 + assert feats[0] == torch.Size((1, 64, 112, 112)) + assert feats[1] == torch.Size((1, 64, 56, 56)) + assert feats[2] == torch.Size((1, 128, 28, 28)) + assert feats[3] == torch.Size((1, 256, 14, 14)) + assert feats[4] == torch.Size((1, 512, 7, 7)) + + # Test resnet18 from timm, features_only=True, output_stride=16 + model = TIMMBackbone( + model_name='resnet18', + features_only=True, + pretrained=False, + output_stride=16) + imgs = torch.randn(1, 3, 224, 224) + feats = model(imgs) + feats = [feat.shape for feat in feats] + assert len(feats) == 5 + assert feats[0] == torch.Size((1, 64, 112, 112)) + assert feats[1] == torch.Size((1, 64, 56, 56)) + assert feats[2] == torch.Size((1, 128, 28, 28)) + assert feats[3] == torch.Size((1, 256, 14, 14)) + assert feats[4] == torch.Size((1, 512, 14, 14)) + + # Test resnet18 from timm, features_only=True, output_stride=8 + model = TIMMBackbone( + model_name='resnet18', + features_only=True, + pretrained=False, + output_stride=8) + imgs = torch.randn(1, 3, 224, 224) + feats = model(imgs) + feats = [feat.shape for feat in feats] + assert len(feats) == 5 + assert feats[0] == torch.Size((1, 64, 112, 112)) + assert feats[1] == torch.Size((1, 64, 56, 56)) + assert feats[2] == torch.Size((1, 128, 28, 28)) + assert feats[3] == torch.Size((1, 256, 28, 28)) + assert feats[4] == torch.Size((1, 512, 28, 28)) + + # Test efficientnet_b1 with pretrained weights + model = TIMMBackbone(model_name='efficientnet_b1', pretrained=True) + + # Test resnetv2_50x1_bitm from timm, features_only=True, output_stride=8 + model = TIMMBackbone( + model_name='resnetv2_50x1_bitm', + features_only=True, + pretrained=False, + output_stride=8) + imgs = torch.randn(1, 3, 8, 8) + feats = model(imgs) + feats = [feat.shape for feat in feats] + assert len(feats) == 5 + assert feats[0] == torch.Size((1, 64, 4, 4)) + assert feats[1] == torch.Size((1, 256, 2, 2)) + assert feats[2] == torch.Size((1, 512, 1, 1)) + assert feats[3] == torch.Size((1, 1024, 1, 1)) + assert feats[4] == torch.Size((1, 2048, 1, 1)) + + # Test resnetv2_50x3_bitm from timm, features_only=True, output_stride=8 + model = TIMMBackbone( + model_name='resnetv2_50x3_bitm', + features_only=True, + pretrained=False, + output_stride=8) + imgs = torch.randn(1, 3, 8, 8) + feats = model(imgs) + feats = [feat.shape for feat in feats] + assert len(feats) == 5 + assert feats[0] == torch.Size((1, 192, 4, 4)) + assert feats[1] == torch.Size((1, 768, 2, 2)) + assert feats[2] == torch.Size((1, 1536, 1, 1)) + assert feats[3] == torch.Size((1, 3072, 1, 1)) + assert feats[4] == torch.Size((1, 6144, 1, 1)) + + # Test resnetv2_101x1_bitm from timm, features_only=True, output_stride=8 + model = TIMMBackbone( + model_name='resnetv2_101x1_bitm', + features_only=True, + pretrained=False, + output_stride=8) + imgs = torch.randn(1, 3, 8, 8) + feats = model(imgs) + feats = [feat.shape for feat in feats] + assert len(feats) == 5 + assert feats[0] == torch.Size((1, 64, 4, 4)) + assert feats[1] == torch.Size((1, 256, 2, 2)) + assert feats[2] == torch.Size((1, 512, 1, 1)) + assert feats[3] == torch.Size((1, 1024, 1, 1)) + assert feats[4] == torch.Size((1, 2048, 1, 1)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_twins.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_twins.py new file mode 100644 index 0000000..aa3eaf9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_twins.py @@ -0,0 +1,171 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones.twins import (PCPVT, SVT, + ConditionalPositionEncoding, + LocallyGroupedSelfAttention) + + +def test_pcpvt(): + # Test normal input + H, W = (224, 224) + temp = torch.randn((1, 3, H, W)) + model = PCPVT( + embed_dims=[32, 64, 160, 256], + num_heads=[1, 2, 5, 8], + mlp_ratios=[8, 8, 4, 4], + qkv_bias=True, + depths=[3, 4, 6, 3], + sr_ratios=[8, 4, 2, 1], + norm_after_stage=False) + model.init_weights() + outs = model(temp) + assert outs[0].shape == (1, 32, H // 4, W // 4) + assert outs[1].shape == (1, 64, H // 8, W // 8) + assert outs[2].shape == (1, 160, H // 16, W // 16) + assert outs[3].shape == (1, 256, H // 32, W // 32) + + +def test_svt(): + # Test normal input + H, W = (224, 224) + temp = torch.randn((1, 3, H, W)) + model = SVT( + embed_dims=[32, 64, 128], + num_heads=[1, 2, 4], + mlp_ratios=[4, 4, 4], + qkv_bias=False, + depths=[4, 4, 4], + windiow_sizes=[7, 7, 7], + norm_after_stage=True) + + model.init_weights() + outs = model(temp) + assert outs[0].shape == (1, 32, H // 4, W // 4) + assert outs[1].shape == (1, 64, H // 8, W // 8) + assert outs[2].shape == (1, 128, H // 16, W // 16) + + +def test_svt_init(): + path = 'PATH_THAT_DO_NOT_EXIST' + # Test all combinations of pretrained and init_cfg + # pretrained=None, init_cfg=None + model = SVT(pretrained=None, init_cfg=None) + assert model.init_cfg is None + model.init_weights() + + # pretrained=None + # init_cfg loads pretrain from an non-existent file + model = SVT( + pretrained=None, init_cfg=dict(type='Pretrained', checkpoint=path)) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # pretrained=None + # init_cfg=123, whose type is unsupported + model = SVT(pretrained=None, init_cfg=123) + with pytest.raises(TypeError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg=None + model = SVT(pretrained=path, init_cfg=None) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + model = SVT( + pretrained=path, init_cfg=dict(type='Pretrained', checkpoint=path)) + with pytest.raises(AssertionError): + model = SVT(pretrained=path, init_cfg=123) + + # pretrain=123, whose type is unsupported + # init_cfg=None + with pytest.raises(TypeError): + model = SVT(pretrained=123, init_cfg=None) + + # pretrain=123, whose type is unsupported + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + model = SVT( + pretrained=123, init_cfg=dict(type='Pretrained', checkpoint=path)) + + # pretrain=123, whose type is unsupported + # init_cfg=123, whose type is unsupported + with pytest.raises(AssertionError): + model = SVT(pretrained=123, init_cfg=123) + + +def test_pcpvt_init(): + path = 'PATH_THAT_DO_NOT_EXIST' + # Test all combinations of pretrained and init_cfg + # pretrained=None, init_cfg=None + model = PCPVT(pretrained=None, init_cfg=None) + assert model.init_cfg is None + model.init_weights() + + # pretrained=None + # init_cfg loads pretrain from an non-existent file + model = PCPVT( + pretrained=None, init_cfg=dict(type='Pretrained', checkpoint=path)) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # pretrained=None + # init_cfg=123, whose type is unsupported + model = PCPVT(pretrained=None, init_cfg=123) + with pytest.raises(TypeError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg=None + model = PCPVT(pretrained=path, init_cfg=None) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + model = PCPVT( + pretrained=path, init_cfg=dict(type='Pretrained', checkpoint=path)) + with pytest.raises(AssertionError): + model = PCPVT(pretrained=path, init_cfg=123) + + # pretrain=123, whose type is unsupported + # init_cfg=None + with pytest.raises(TypeError): + model = PCPVT(pretrained=123, init_cfg=None) + + # pretrain=123, whose type is unsupported + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + model = PCPVT( + pretrained=123, init_cfg=dict(type='Pretrained', checkpoint=path)) + + # pretrain=123, whose type is unsupported + # init_cfg=123, whose type is unsupported + with pytest.raises(AssertionError): + model = PCPVT(pretrained=123, init_cfg=123) + + +def test_locallygrouped_self_attention_module(): + LSA = LocallyGroupedSelfAttention(embed_dims=32, window_size=3) + outs = LSA(torch.randn(1, 3136, 32), (56, 56)) + assert outs.shape == torch.Size([1, 3136, 32]) + + +def test_conditional_position_encoding_module(): + CPE = ConditionalPositionEncoding(in_channels=32, embed_dims=32, stride=2) + outs = CPE(torch.randn(1, 3136, 32), (56, 56)) + assert outs.shape == torch.Size([1, 784, 32]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_unet.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_unet.py new file mode 100644 index 0000000..9beb727 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_unet.py @@ -0,0 +1,822 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch +from mmcv.cnn import ConvModule + +from mmseg.models.backbones.unet import (BasicConvBlock, DeconvModule, + InterpConv, UNet, UpConvBlock) +from mmseg.ops import Upsample +from .utils import check_norm_state + + +def test_unet_basic_conv_block(): + with pytest.raises(AssertionError): + # Not implemented yet. + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) + BasicConvBlock(64, 64, dcn=dcn) + + with pytest.raises(AssertionError): + # Not implemented yet. + plugins = [ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + position='after_conv3') + ] + BasicConvBlock(64, 64, plugins=plugins) + + with pytest.raises(AssertionError): + # Not implemented yet + plugins = [ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + position='after_conv2') + ] + BasicConvBlock(64, 64, plugins=plugins) + + # test BasicConvBlock with checkpoint forward + block = BasicConvBlock(16, 16, with_cp=True) + assert block.with_cp + x = torch.randn(1, 16, 64, 64, requires_grad=True) + x_out = block(x) + assert x_out.shape == torch.Size([1, 16, 64, 64]) + + block = BasicConvBlock(16, 16, with_cp=False) + assert not block.with_cp + x = torch.randn(1, 16, 64, 64) + x_out = block(x) + assert x_out.shape == torch.Size([1, 16, 64, 64]) + + # test BasicConvBlock with stride convolution to downsample + block = BasicConvBlock(16, 16, stride=2) + x = torch.randn(1, 16, 64, 64) + x_out = block(x) + assert x_out.shape == torch.Size([1, 16, 32, 32]) + + # test BasicConvBlock structure and forward + block = BasicConvBlock(16, 64, num_convs=3, dilation=3) + assert block.convs[0].conv.in_channels == 16 + assert block.convs[0].conv.out_channels == 64 + assert block.convs[0].conv.kernel_size == (3, 3) + assert block.convs[0].conv.dilation == (1, 1) + assert block.convs[0].conv.padding == (1, 1) + + assert block.convs[1].conv.in_channels == 64 + assert block.convs[1].conv.out_channels == 64 + assert block.convs[1].conv.kernel_size == (3, 3) + assert block.convs[1].conv.dilation == (3, 3) + assert block.convs[1].conv.padding == (3, 3) + + assert block.convs[2].conv.in_channels == 64 + assert block.convs[2].conv.out_channels == 64 + assert block.convs[2].conv.kernel_size == (3, 3) + assert block.convs[2].conv.dilation == (3, 3) + assert block.convs[2].conv.padding == (3, 3) + + +def test_deconv_module(): + with pytest.raises(AssertionError): + # kernel_size should be greater than or equal to scale_factor and + # (kernel_size - scale_factor) should be even numbers + DeconvModule(64, 32, kernel_size=1, scale_factor=2) + + with pytest.raises(AssertionError): + # kernel_size should be greater than or equal to scale_factor and + # (kernel_size - scale_factor) should be even numbers + DeconvModule(64, 32, kernel_size=3, scale_factor=2) + + with pytest.raises(AssertionError): + # kernel_size should be greater than or equal to scale_factor and + # (kernel_size - scale_factor) should be even numbers + DeconvModule(64, 32, kernel_size=5, scale_factor=4) + + # test DeconvModule with checkpoint forward and upsample 2X. + block = DeconvModule(64, 32, with_cp=True) + assert block.with_cp + x = torch.randn(1, 64, 128, 128, requires_grad=True) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + block = DeconvModule(64, 32, with_cp=False) + assert not block.with_cp + x = torch.randn(1, 64, 128, 128) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + # test DeconvModule with different kernel size for upsample 2X. + x = torch.randn(1, 64, 64, 64) + block = DeconvModule(64, 32, kernel_size=2, scale_factor=2) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 128, 128]) + + block = DeconvModule(64, 32, kernel_size=6, scale_factor=2) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 128, 128]) + + # test DeconvModule with different kernel size for upsample 4X. + x = torch.randn(1, 64, 64, 64) + block = DeconvModule(64, 32, kernel_size=4, scale_factor=4) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + block = DeconvModule(64, 32, kernel_size=6, scale_factor=4) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + +def test_interp_conv(): + # test InterpConv with checkpoint forward and upsample 2X. + block = InterpConv(64, 32, with_cp=True) + assert block.with_cp + x = torch.randn(1, 64, 128, 128, requires_grad=True) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + block = InterpConv(64, 32, with_cp=False) + assert not block.with_cp + x = torch.randn(1, 64, 128, 128) + x_out = block(x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + # test InterpConv with conv_first=False for upsample 2X. + block = InterpConv(64, 32, conv_first=False) + x = torch.randn(1, 64, 128, 128) + x_out = block(x) + assert isinstance(block.interp_upsample[0], Upsample) + assert isinstance(block.interp_upsample[1], ConvModule) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + # test InterpConv with conv_first=True for upsample 2X. + block = InterpConv(64, 32, conv_first=True) + x = torch.randn(1, 64, 128, 128) + x_out = block(x) + assert isinstance(block.interp_upsample[0], ConvModule) + assert isinstance(block.interp_upsample[1], Upsample) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + # test InterpConv with bilinear upsample for upsample 2X. + block = InterpConv( + 64, + 32, + conv_first=False, + upsample_cfg=dict( + scale_factor=2, mode='bilinear', align_corners=False)) + x = torch.randn(1, 64, 128, 128) + x_out = block(x) + assert isinstance(block.interp_upsample[0], Upsample) + assert isinstance(block.interp_upsample[1], ConvModule) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + assert block.interp_upsample[0].mode == 'bilinear' + + # test InterpConv with nearest upsample for upsample 2X. + block = InterpConv( + 64, + 32, + conv_first=False, + upsample_cfg=dict(scale_factor=2, mode='nearest')) + x = torch.randn(1, 64, 128, 128) + x_out = block(x) + assert isinstance(block.interp_upsample[0], Upsample) + assert isinstance(block.interp_upsample[1], ConvModule) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + assert block.interp_upsample[0].mode == 'nearest' + + +def test_up_conv_block(): + with pytest.raises(AssertionError): + # Not implemented yet. + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) + UpConvBlock(BasicConvBlock, 64, 32, 32, dcn=dcn) + + with pytest.raises(AssertionError): + # Not implemented yet. + plugins = [ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + position='after_conv3') + ] + UpConvBlock(BasicConvBlock, 64, 32, 32, plugins=plugins) + + with pytest.raises(AssertionError): + # Not implemented yet + plugins = [ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + position='after_conv2') + ] + UpConvBlock(BasicConvBlock, 64, 32, 32, plugins=plugins) + + # test UpConvBlock with checkpoint forward and upsample 2X. + block = UpConvBlock(BasicConvBlock, 64, 32, 32, with_cp=True) + skip_x = torch.randn(1, 32, 256, 256, requires_grad=True) + x = torch.randn(1, 64, 128, 128, requires_grad=True) + x_out = block(skip_x, x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + # test UpConvBlock with upsample=True for upsample 2X. The spatial size of + # skip_x is 2X larger than x. + block = UpConvBlock( + BasicConvBlock, 64, 32, 32, upsample_cfg=dict(type='InterpConv')) + skip_x = torch.randn(1, 32, 256, 256) + x = torch.randn(1, 64, 128, 128) + x_out = block(skip_x, x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + # test UpConvBlock with upsample=False for upsample 2X. The spatial size of + # skip_x is the same as that of x. + block = UpConvBlock(BasicConvBlock, 64, 32, 32, upsample_cfg=None) + skip_x = torch.randn(1, 32, 256, 256) + x = torch.randn(1, 64, 256, 256) + x_out = block(skip_x, x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + # test UpConvBlock with different upsample method for upsample 2X. + # The upsample method is interpolation upsample (bilinear or nearest). + block = UpConvBlock( + BasicConvBlock, + 64, + 32, + 32, + upsample_cfg=dict( + type='InterpConv', + upsample_cfg=dict( + scale_factor=2, mode='bilinear', align_corners=False))) + skip_x = torch.randn(1, 32, 256, 256) + x = torch.randn(1, 64, 128, 128) + x_out = block(skip_x, x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + # test UpConvBlock with different upsample method for upsample 2X. + # The upsample method is deconvolution upsample. + block = UpConvBlock( + BasicConvBlock, + 64, + 32, + 32, + upsample_cfg=dict(type='DeconvModule', kernel_size=4, scale_factor=2)) + skip_x = torch.randn(1, 32, 256, 256) + x = torch.randn(1, 64, 128, 128) + x_out = block(skip_x, x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + # test BasicConvBlock structure and forward + block = UpConvBlock( + conv_block=BasicConvBlock, + in_channels=64, + skip_channels=32, + out_channels=32, + num_convs=3, + dilation=3, + upsample_cfg=dict( + type='InterpConv', + upsample_cfg=dict( + scale_factor=2, mode='bilinear', align_corners=False))) + skip_x = torch.randn(1, 32, 256, 256) + x = torch.randn(1, 64, 128, 128) + x_out = block(skip_x, x) + assert x_out.shape == torch.Size([1, 32, 256, 256]) + + assert block.conv_block.convs[0].conv.in_channels == 64 + assert block.conv_block.convs[0].conv.out_channels == 32 + assert block.conv_block.convs[0].conv.kernel_size == (3, 3) + assert block.conv_block.convs[0].conv.dilation == (1, 1) + assert block.conv_block.convs[0].conv.padding == (1, 1) + + assert block.conv_block.convs[1].conv.in_channels == 32 + assert block.conv_block.convs[1].conv.out_channels == 32 + assert block.conv_block.convs[1].conv.kernel_size == (3, 3) + assert block.conv_block.convs[1].conv.dilation == (3, 3) + assert block.conv_block.convs[1].conv.padding == (3, 3) + + assert block.conv_block.convs[2].conv.in_channels == 32 + assert block.conv_block.convs[2].conv.out_channels == 32 + assert block.conv_block.convs[2].conv.kernel_size == (3, 3) + assert block.conv_block.convs[2].conv.dilation == (3, 3) + assert block.conv_block.convs[2].conv.padding == (3, 3) + + assert block.upsample.interp_upsample[1].conv.in_channels == 64 + assert block.upsample.interp_upsample[1].conv.out_channels == 32 + assert block.upsample.interp_upsample[1].conv.kernel_size == (1, 1) + assert block.upsample.interp_upsample[1].conv.dilation == (1, 1) + assert block.upsample.interp_upsample[1].conv.padding == (0, 0) + + +def test_unet(): + with pytest.raises(AssertionError): + # Not implemented yet. + dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False) + UNet(3, 64, 5, dcn=dcn) + + with pytest.raises(AssertionError): + # Not implemented yet. + plugins = [ + dict( + cfg=dict(type='ContextBlock', ratio=1. / 16), + position='after_conv3') + ] + UNet(3, 64, 5, plugins=plugins) + + with pytest.raises(AssertionError): + # Not implemented yet + plugins = [ + dict( + cfg=dict( + type='GeneralizedAttention', + spatial_range=-1, + num_heads=8, + attention_type='0010', + kv_stride=2), + position='after_conv2') + ] + UNet(3, 64, 5, plugins=plugins) + + with pytest.raises(AssertionError): + # Check whether the input image size can be divisible by the whole + # downsample rate of the encoder. The whole downsample rate of this + # case is 8. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=4, + strides=(1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2), + dec_num_convs=(2, 2, 2), + downsamples=(True, True, True), + enc_dilations=(1, 1, 1, 1), + dec_dilations=(1, 1, 1)) + x = torch.randn(2, 3, 65, 65) + unet(x) + + with pytest.raises(AssertionError): + # Check whether the input image size can be divisible by the whole + # downsample rate of the encoder. The whole downsample rate of this + # case is 16. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 65, 65) + unet(x) + + with pytest.raises(AssertionError): + # Check whether the input image size can be divisible by the whole + # downsample rate of the encoder. The whole downsample rate of this + # case is 8. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 65, 65) + unet(x) + + with pytest.raises(AssertionError): + # Check whether the input image size can be divisible by the whole + # downsample rate of the encoder. The whole downsample rate of this + # case is 8. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 2, 2, 2, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 65, 65) + unet(x) + + with pytest.raises(AssertionError): + # Check whether the input image size can be divisible by the whole + # downsample rate of the encoder. The whole downsample rate of this + # case is 32. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=6, + strides=(1, 1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2, 2), + downsamples=(True, True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1, 1)) + x = torch.randn(2, 3, 65, 65) + unet(x) + + with pytest.raises(AssertionError): + # Check if num_stages matches strides, len(strides)=num_stages + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 64, 64) + unet(x) + + with pytest.raises(AssertionError): + # Check if num_stages matches strides, len(enc_num_convs)=num_stages + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 64, 64) + unet(x) + + with pytest.raises(AssertionError): + # Check if num_stages matches strides, len(dec_num_convs)=num_stages-1 + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 64, 64) + unet(x) + + with pytest.raises(AssertionError): + # Check if num_stages matches strides, len(downsamples)=num_stages-1 + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 64, 64) + unet(x) + + with pytest.raises(AssertionError): + # Check if num_stages matches strides, len(enc_dilations)=num_stages + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 64, 64) + unet(x) + + with pytest.raises(AssertionError): + # Check if num_stages matches strides, len(dec_dilations)=num_stages-1 + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1, 1)) + x = torch.randn(2, 3, 64, 64) + unet(x) + + # test UNet norm_eval=True + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + norm_eval=True) + unet.train() + assert check_norm_state(unet.modules(), False) + + # test UNet norm_eval=False + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + norm_eval=False) + unet.train() + assert check_norm_state(unet.modules(), True) + + # test UNet forward and outputs. The whole downsample rate is 16. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 8, 8]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 8. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 16, 16]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 8. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 2, 2, 2, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 16, 16]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 4. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, False, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 32, 32]) + assert x_outs[1].shape == torch.Size([2, 32, 32, 32]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 4. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 2, 2, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, False, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 32, 32]) + assert x_outs[1].shape == torch.Size([2, 32, 32, 32]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 8. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 16, 16]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 4. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, False, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 32, 32]) + assert x_outs[1].shape == torch.Size([2, 32, 32, 32]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 2. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, False, False, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 64, 64]) + assert x_outs[1].shape == torch.Size([2, 32, 64, 64]) + assert x_outs[2].shape == torch.Size([2, 16, 64, 64]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 1. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(False, False, False, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 128, 128]) + assert x_outs[1].shape == torch.Size([2, 32, 128, 128]) + assert x_outs[2].shape == torch.Size([2, 16, 128, 128]) + assert x_outs[3].shape == torch.Size([2, 8, 128, 128]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 16. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 2, 2, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 8, 8]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 8. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 2, 2, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 16, 16]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 8. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 2, 2, 2, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 16, 16]) + assert x_outs[1].shape == torch.Size([2, 32, 16, 16]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet forward and outputs. The whole downsample rate is 4. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 2, 2, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, False, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1)) + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 32, 32]) + assert x_outs[1].shape == torch.Size([2, 32, 32, 32]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) + + # test UNet init_weights method. + unet = UNet( + in_channels=3, + base_channels=4, + num_stages=5, + strides=(1, 2, 2, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, False, False), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + pretrained=None) + unet.init_weights() + x = torch.randn(2, 3, 128, 128) + x_outs = unet(x) + assert x_outs[0].shape == torch.Size([2, 64, 32, 32]) + assert x_outs[1].shape == torch.Size([2, 32, 32, 32]) + assert x_outs[2].shape == torch.Size([2, 16, 32, 32]) + assert x_outs[3].shape == torch.Size([2, 8, 64, 64]) + assert x_outs[4].shape == torch.Size([2, 4, 128, 128]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_vit.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_vit.py new file mode 100644 index 0000000..0d1ba70 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/test_vit.py @@ -0,0 +1,185 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.backbones.vit import (TransformerEncoderLayer, + VisionTransformer) +from .utils import check_norm_state + + +def test_vit_backbone(): + with pytest.raises(TypeError): + # pretrained must be a string path + model = VisionTransformer() + model.init_weights(pretrained=0) + + with pytest.raises(TypeError): + # img_size must be int or tuple + model = VisionTransformer(img_size=512.0) + + with pytest.raises(TypeError): + # out_indices must be int ,list or tuple + model = VisionTransformer(out_indices=1.) + + with pytest.raises(TypeError): + # test upsample_pos_embed function + x = torch.randn(1, 196) + VisionTransformer.resize_pos_embed(x, 512, 512, 224, 224, 'bilinear') + + with pytest.raises(AssertionError): + # The length of img_size tuple must be lower than 3. + VisionTransformer(img_size=(224, 224, 224)) + + with pytest.raises(TypeError): + # Pretrained must be None or Str. + VisionTransformer(pretrained=123) + + with pytest.raises(AssertionError): + # with_cls_token must be True when output_cls_token == True + VisionTransformer(with_cls_token=False, output_cls_token=True) + + # Test img_size isinstance tuple + imgs = torch.randn(1, 3, 224, 224) + model = VisionTransformer(img_size=(224, )) + model.init_weights() + model(imgs) + + # Test img_size isinstance tuple + imgs = torch.randn(1, 3, 224, 224) + model = VisionTransformer(img_size=(224, 224)) + model(imgs) + + # Test norm_eval = True + model = VisionTransformer(norm_eval=True) + model.train() + + # Test ViT backbone with input size of 224 and patch size of 16 + model = VisionTransformer() + model.init_weights() + model.train() + + assert check_norm_state(model.modules(), True) + + # Test normal size input image + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test large size input image + imgs = torch.randn(1, 3, 256, 256) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 16, 16) + + # Test small size input image + imgs = torch.randn(1, 3, 32, 32) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 2, 2) + + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test unbalanced size input image + imgs = torch.randn(1, 3, 112, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 7, 14) + + # Test irregular input image + imgs = torch.randn(1, 3, 234, 345) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 15, 22) + + # Test with_cp=True + model = VisionTransformer(with_cp=True) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test with_cls_token=False + model = VisionTransformer(with_cls_token=False) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test final norm + model = VisionTransformer(final_norm=True) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test patch norm + model = VisionTransformer(patch_norm=True) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[-1].shape == (1, 768, 14, 14) + + # Test output_cls_token + model = VisionTransformer(with_cls_token=True, output_cls_token=True) + imgs = torch.randn(1, 3, 224, 224) + feat = model(imgs) + assert feat[0][0].shape == (1, 768, 14, 14) + assert feat[0][1].shape == (1, 768) + + # Test TransformerEncoderLayer with checkpoint forward + block = TransformerEncoderLayer( + embed_dims=64, num_heads=4, feedforward_channels=256, with_cp=True) + assert block.with_cp + x = torch.randn(1, 56 * 56, 64) + x_out = block(x) + assert x_out.shape == torch.Size([1, 56 * 56, 64]) + + +def test_vit_init(): + path = 'PATH_THAT_DO_NOT_EXIST' + # Test all combinations of pretrained and init_cfg + # pretrained=None, init_cfg=None + model = VisionTransformer(pretrained=None, init_cfg=None) + assert model.init_cfg is None + model.init_weights() + + # pretrained=None + # init_cfg loads pretrain from an non-existent file + model = VisionTransformer( + pretrained=None, init_cfg=dict(type='Pretrained', checkpoint=path)) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # pretrained=None + # init_cfg=123, whose type is unsupported + model = VisionTransformer(pretrained=None, init_cfg=123) + with pytest.raises(TypeError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg=None + model = VisionTransformer(pretrained=path, init_cfg=None) + assert model.init_cfg == dict(type='Pretrained', checkpoint=path) + # Test loading a checkpoint from an non-existent file + with pytest.raises(OSError): + model.init_weights() + + # pretrained loads pretrain from an non-existent file + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + model = VisionTransformer( + pretrained=path, init_cfg=dict(type='Pretrained', checkpoint=path)) + with pytest.raises(AssertionError): + model = VisionTransformer(pretrained=path, init_cfg=123) + + # pretrain=123, whose type is unsupported + # init_cfg=None + with pytest.raises(TypeError): + model = VisionTransformer(pretrained=123, init_cfg=None) + + # pretrain=123, whose type is unsupported + # init_cfg loads pretrain from an non-existent file + with pytest.raises(AssertionError): + model = VisionTransformer( + pretrained=123, init_cfg=dict(type='Pretrained', checkpoint=path)) + + # pretrain=123, whose type is unsupported + # init_cfg=123, whose type is unsupported + with pytest.raises(AssertionError): + model = VisionTransformer(pretrained=123, init_cfg=123) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/utils.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/utils.py new file mode 100644 index 0000000..54b6404 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_backbones/utils.py @@ -0,0 +1,43 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from torch.nn.modules import GroupNorm +from torch.nn.modules.batchnorm import _BatchNorm + +from mmseg.models.backbones.resnet import BasicBlock, Bottleneck +from mmseg.models.backbones.resnext import Bottleneck as BottleneckX + + +def is_block(modules): + """Check if is ResNet building block.""" + if isinstance(modules, (BasicBlock, Bottleneck, BottleneckX)): + return True + return False + + +def is_norm(modules): + """Check if is one of the norms.""" + if isinstance(modules, (GroupNorm, _BatchNorm)): + return True + return False + + +def all_zeros(modules): + """Check if the weight(and bias) is all zero.""" + weight_zero = torch.allclose(modules.weight.data, + torch.zeros_like(modules.weight.data)) + if hasattr(modules, 'bias'): + bias_zero = torch.allclose(modules.bias.data, + torch.zeros_like(modules.bias.data)) + else: + bias_zero = True + + return weight_zero and bias_zero + + +def check_norm_state(modules, train_state): + """Check if norm layer is in correct train state.""" + for mod in modules: + if isinstance(mod, _BatchNorm): + if mod.training != train_state: + return False + return True diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_forward.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_forward.py new file mode 100644 index 0000000..ee707b3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_forward.py @@ -0,0 +1,235 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""pytest tests/test_forward.py.""" +import copy +from os.path import dirname, exists, join +from unittest.mock import patch + +import numpy as np +import pytest +import torch +import torch.nn as nn +from mmcv.cnn.utils import revert_sync_batchnorm + + +def _demo_mm_inputs(input_shape=(2, 3, 8, 16), num_classes=10): + """Create a superset of inputs needed to run test or train batches. + + Args: + input_shape (tuple): + input batch dimensions + + num_classes (int): + number of semantic classes + """ + (N, C, H, W) = input_shape + + rng = np.random.RandomState(0) + + imgs = rng.rand(*input_shape) + segs = rng.randint( + low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8) + + img_metas = [{ + 'img_shape': (H, W, C), + 'ori_shape': (H, W, C), + 'pad_shape': (H, W, C), + 'filename': '.png', + 'scale_factor': 1.0, + 'flip': False, + 'flip_direction': 'horizontal' + } for _ in range(N)] + + mm_inputs = { + 'imgs': torch.FloatTensor(imgs), + 'img_metas': img_metas, + 'gt_semantic_seg': torch.LongTensor(segs) + } + return mm_inputs + + +def _get_config_directory(): + """Find the predefined segmentor config directory.""" + try: + # Assume we are running in the source mmsegmentation repo + repo_dpath = dirname(dirname(dirname(__file__))) + except NameError: + # For IPython development when this __file__ is not defined + import mmseg + repo_dpath = dirname(dirname(dirname(mmseg.__file__))) + config_dpath = join(repo_dpath, 'configs') + if not exists(config_dpath): + raise Exception('Cannot find config path') + return config_dpath + + +def _get_config_module(fname): + """Load a configuration as a python module.""" + from mmcv import Config + config_dpath = _get_config_directory() + config_fpath = join(config_dpath, fname) + config_mod = Config.fromfile(config_fpath) + return config_mod + + +def _get_segmentor_cfg(fname): + """Grab configs necessary to create a segmentor. + + These are deep copied to allow for safe modification of parameters without + influencing other tests. + """ + config = _get_config_module(fname) + model = copy.deepcopy(config.model) + return model + + +def test_pspnet_forward(): + _test_encoder_decoder_forward( + 'pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py') + + +def test_fcn_forward(): + _test_encoder_decoder_forward('fcn/fcn_r50-d8_512x1024_40k_cityscapes.py') + + +def test_deeplabv3_forward(): + _test_encoder_decoder_forward( + 'deeplabv3/deeplabv3_r50-d8_512x1024_40k_cityscapes.py') + + +def test_deeplabv3plus_forward(): + _test_encoder_decoder_forward( + 'deeplabv3plus/deeplabv3plus_r50-d8_512x1024_40k_cityscapes.py') + + +def test_gcnet_forward(): + _test_encoder_decoder_forward( + 'gcnet/gcnet_r50-d8_512x1024_40k_cityscapes.py') + + +def test_ann_forward(): + _test_encoder_decoder_forward('ann/ann_r50-d8_512x1024_40k_cityscapes.py') + + +def test_ccnet_forward(): + if not torch.cuda.is_available(): + pytest.skip('CCNet requires CUDA') + _test_encoder_decoder_forward( + 'ccnet/ccnet_r50-d8_512x1024_40k_cityscapes.py') + + +def test_danet_forward(): + _test_encoder_decoder_forward( + 'danet/danet_r50-d8_512x1024_40k_cityscapes.py') + + +def test_nonlocal_net_forward(): + _test_encoder_decoder_forward( + 'nonlocal_net/nonlocal_r50-d8_512x1024_40k_cityscapes.py') + + +def test_upernet_forward(): + _test_encoder_decoder_forward( + 'upernet/upernet_r50_512x1024_40k_cityscapes.py') + + +def test_hrnet_forward(): + _test_encoder_decoder_forward('hrnet/fcn_hr18s_512x1024_40k_cityscapes.py') + + +def test_ocrnet_forward(): + _test_encoder_decoder_forward( + 'ocrnet/ocrnet_hr18s_512x1024_40k_cityscapes.py') + + +def test_psanet_forward(): + _test_encoder_decoder_forward( + 'psanet/psanet_r50-d8_512x1024_40k_cityscapes.py') + + +def test_encnet_forward(): + _test_encoder_decoder_forward( + 'encnet/encnet_r50-d8_512x1024_40k_cityscapes.py') + + +def test_sem_fpn_forward(): + _test_encoder_decoder_forward('sem_fpn/fpn_r50_512x1024_80k_cityscapes.py') + + +def test_point_rend_forward(): + _test_encoder_decoder_forward( + 'point_rend/pointrend_r50_512x1024_80k_cityscapes.py') + + +def test_mobilenet_v2_forward(): + _test_encoder_decoder_forward( + 'mobilenet_v2/pspnet_m-v2-d8_512x1024_80k_cityscapes.py') + + +def test_dnlnet_forward(): + _test_encoder_decoder_forward( + 'dnlnet/dnl_r50-d8_512x1024_40k_cityscapes.py') + + +def test_emanet_forward(): + _test_encoder_decoder_forward( + 'emanet/emanet_r50-d8_512x1024_80k_cityscapes.py') + + +def test_isanet_forward(): + _test_encoder_decoder_forward( + 'isanet/isanet_r50-d8_512x1024_40k_cityscapes.py') + + +def get_world_size(process_group): + + return 1 + + +def _check_input_dim(self, inputs): + pass + + +@patch('torch.nn.modules.batchnorm._BatchNorm._check_input_dim', + _check_input_dim) +@patch('torch.distributed.get_world_size', get_world_size) +def _test_encoder_decoder_forward(cfg_file): + model = _get_segmentor_cfg(cfg_file) + model['pretrained'] = None + model['test_cfg']['mode'] = 'whole' + + from mmseg.models import build_segmentor + segmentor = build_segmentor(model) + segmentor.init_weights() + + if isinstance(segmentor.decode_head, nn.ModuleList): + num_classes = segmentor.decode_head[-1].num_classes + else: + num_classes = segmentor.decode_head.num_classes + # batch_size=2 for BatchNorm + input_shape = (2, 3, 32, 32) + mm_inputs = _demo_mm_inputs(input_shape, num_classes=num_classes) + + imgs = mm_inputs.pop('imgs') + img_metas = mm_inputs.pop('img_metas') + gt_semantic_seg = mm_inputs['gt_semantic_seg'] + + # convert to cuda Tensor if applicable + if torch.cuda.is_available(): + segmentor = segmentor.cuda() + imgs = imgs.cuda() + gt_semantic_seg = gt_semantic_seg.cuda() + else: + segmentor = revert_sync_batchnorm(segmentor) + + # Test forward train + losses = segmentor.forward( + imgs, img_metas, gt_semantic_seg=gt_semantic_seg, return_loss=True) + assert isinstance(losses, dict) + + # Test forward test + with torch.no_grad(): + segmentor.eval() + # pack into lists + img_list = [img[None, :] for img in imgs] + img_meta_list = [[img_meta] for img_meta in img_metas] + segmentor.forward(img_list, img_meta_list, return_loss=False) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/__init__.py new file mode 100644 index 0000000..ef101fe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/__init__.py @@ -0,0 +1 @@ +# Copyright (c) OpenMMLab. All rights reserved. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_ann_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_ann_head.py new file mode 100644 index 0000000..c1e44bc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_ann_head.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import ANNHead +from .utils import to_cuda + + +def test_ann_head(): + + inputs = [torch.randn(1, 4, 45, 45), torch.randn(1, 8, 21, 21)] + head = ANNHead( + in_channels=[4, 8], + channels=2, + num_classes=19, + in_index=[-2, -1], + project_channels=8) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 21, 21) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_apc_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_apc_head.py new file mode 100644 index 0000000..dc55ccc --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_apc_head.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import APCHead +from .utils import _conv_has_norm, to_cuda + + +def test_apc_head(): + + with pytest.raises(AssertionError): + # pool_scales must be list|tuple + APCHead(in_channels=8, channels=2, num_classes=19, pool_scales=1) + + # test no norm_cfg + head = APCHead(in_channels=8, channels=2, num_classes=19) + assert not _conv_has_norm(head, sync_bn=False) + + # test with norm_cfg + head = APCHead( + in_channels=8, + channels=2, + num_classes=19, + norm_cfg=dict(type='SyncBN')) + assert _conv_has_norm(head, sync_bn=True) + + # fusion=True + inputs = [torch.randn(1, 8, 45, 45)] + head = APCHead( + in_channels=8, + channels=2, + num_classes=19, + pool_scales=(1, 2, 3), + fusion=True) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.fusion is True + assert head.acm_modules[0].pool_scale == 1 + assert head.acm_modules[1].pool_scale == 2 + assert head.acm_modules[2].pool_scale == 3 + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 45, 45) + + # fusion=False + inputs = [torch.randn(1, 8, 45, 45)] + head = APCHead( + in_channels=8, + channels=2, + num_classes=19, + pool_scales=(1, 2, 3), + fusion=False) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.fusion is False + assert head.acm_modules[0].pool_scale == 1 + assert head.acm_modules[1].pool_scale == 2 + assert head.acm_modules[2].pool_scale == 3 + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 45, 45) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_aspp_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_aspp_head.py new file mode 100644 index 0000000..db9e893 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_aspp_head.py @@ -0,0 +1,76 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import ASPPHead, DepthwiseSeparableASPPHead +from .utils import _conv_has_norm, to_cuda + + +def test_aspp_head(): + + with pytest.raises(AssertionError): + # pool_scales must be list|tuple + ASPPHead(in_channels=8, channels=4, num_classes=19, dilations=1) + + # test no norm_cfg + head = ASPPHead(in_channels=8, channels=4, num_classes=19) + assert not _conv_has_norm(head, sync_bn=False) + + # test with norm_cfg + head = ASPPHead( + in_channels=8, + channels=4, + num_classes=19, + norm_cfg=dict(type='SyncBN')) + assert _conv_has_norm(head, sync_bn=True) + + inputs = [torch.randn(1, 8, 45, 45)] + head = ASPPHead( + in_channels=8, channels=4, num_classes=19, dilations=(1, 12, 24)) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.aspp_modules[0].conv.dilation == (1, 1) + assert head.aspp_modules[1].conv.dilation == (12, 12) + assert head.aspp_modules[2].conv.dilation == (24, 24) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 45, 45) + + +def test_dw_aspp_head(): + + # test w.o. c1 + inputs = [torch.randn(1, 8, 45, 45)] + head = DepthwiseSeparableASPPHead( + c1_in_channels=0, + c1_channels=0, + in_channels=8, + channels=4, + num_classes=19, + dilations=(1, 12, 24)) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.c1_bottleneck is None + assert head.aspp_modules[0].conv.dilation == (1, 1) + assert head.aspp_modules[1].depthwise_conv.dilation == (12, 12) + assert head.aspp_modules[2].depthwise_conv.dilation == (24, 24) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 45, 45) + + # test with c1 + inputs = [torch.randn(1, 4, 45, 45), torch.randn(1, 16, 21, 21)] + head = DepthwiseSeparableASPPHead( + c1_in_channels=4, + c1_channels=2, + in_channels=16, + channels=8, + num_classes=19, + dilations=(1, 12, 24)) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.c1_bottleneck.in_channels == 4 + assert head.c1_bottleneck.out_channels == 2 + assert head.aspp_modules[0].conv.dilation == (1, 1) + assert head.aspp_modules[1].depthwise_conv.dilation == (12, 12) + assert head.aspp_modules[2].depthwise_conv.dilation == (24, 24) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 45, 45) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_cc_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_cc_head.py new file mode 100644 index 0000000..0630417 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_cc_head.py @@ -0,0 +1,18 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import CCHead +from .utils import to_cuda + + +def test_cc_head(): + head = CCHead(in_channels=16, channels=8, num_classes=19) + assert len(head.convs) == 2 + assert hasattr(head, 'cca') + if not torch.cuda.is_available(): + pytest.skip('CCHead requires CUDA') + inputs = [torch.randn(1, 16, 23, 23)] + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_da_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_da_head.py new file mode 100644 index 0000000..7ab4a96 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_da_head.py @@ -0,0 +1,19 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import DAHead +from .utils import to_cuda + + +def test_da_head(): + + inputs = [torch.randn(1, 16, 23, 23)] + head = DAHead(in_channels=16, channels=8, num_classes=19, pam_channels=8) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert isinstance(outputs, tuple) and len(outputs) == 3 + for output in outputs: + assert output.shape == (1, head.num_classes, 23, 23) + test_output = head.forward_test(inputs, None, None) + assert test_output.shape == (1, head.num_classes, 23, 23) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_decode_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_decode_head.py new file mode 100644 index 0000000..cb9ab97 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_decode_head.py @@ -0,0 +1,165 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from unittest.mock import patch + +import pytest +import torch + +from mmseg.models.decode_heads.decode_head import BaseDecodeHead +from .utils import to_cuda + + +@patch.multiple(BaseDecodeHead, __abstractmethods__=set()) +def test_decode_head(): + + with pytest.raises(AssertionError): + # default input_transform doesn't accept multiple inputs + BaseDecodeHead([32, 16], 16, num_classes=19) + + with pytest.raises(AssertionError): + # default input_transform doesn't accept multiple inputs + BaseDecodeHead(32, 16, num_classes=19, in_index=[-1, -2]) + + with pytest.raises(AssertionError): + # supported mode is resize_concat only + BaseDecodeHead(32, 16, num_classes=19, input_transform='concat') + + with pytest.raises(AssertionError): + # in_channels should be list|tuple + BaseDecodeHead(32, 16, num_classes=19, input_transform='resize_concat') + + with pytest.raises(AssertionError): + # in_index should be list|tuple + BaseDecodeHead([32], + 16, + in_index=-1, + num_classes=19, + input_transform='resize_concat') + + with pytest.raises(AssertionError): + # len(in_index) should equal len(in_channels) + BaseDecodeHead([32, 16], + 16, + num_classes=19, + in_index=[-1], + input_transform='resize_concat') + + # test default dropout + head = BaseDecodeHead(32, 16, num_classes=19) + assert hasattr(head, 'dropout') and head.dropout.p == 0.1 + + # test set dropout + head = BaseDecodeHead(32, 16, num_classes=19, dropout_ratio=0.2) + assert hasattr(head, 'dropout') and head.dropout.p == 0.2 + + # test no input_transform + inputs = [torch.randn(1, 32, 45, 45)] + head = BaseDecodeHead(32, 16, num_classes=19) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.in_channels == 32 + assert head.input_transform is None + transformed_inputs = head._transform_inputs(inputs) + assert transformed_inputs.shape == (1, 32, 45, 45) + + # test input_transform = resize_concat + inputs = [torch.randn(1, 32, 45, 45), torch.randn(1, 16, 21, 21)] + head = BaseDecodeHead([32, 16], + 16, + num_classes=19, + in_index=[0, 1], + input_transform='resize_concat') + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.in_channels == 48 + assert head.input_transform == 'resize_concat' + transformed_inputs = head._transform_inputs(inputs) + assert transformed_inputs.shape == (1, 48, 45, 45) + + # test multi-loss, loss_decode is dict + with pytest.raises(TypeError): + # loss_decode must be a dict or sequence of dict. + BaseDecodeHead(3, 16, num_classes=19, loss_decode=['CrossEntropyLoss']) + + inputs = torch.randn(2, 19, 8, 8).float() + target = torch.ones(2, 1, 64, 64).long() + head = BaseDecodeHead( + 3, + 16, + num_classes=19, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + head, target = to_cuda(head, target) + loss = head.losses(seg_logit=inputs, seg_label=target) + assert 'loss_ce' in loss + + # test multi-loss, loss_decode is list of dict + inputs = torch.randn(2, 19, 8, 8).float() + target = torch.ones(2, 1, 64, 64).long() + head = BaseDecodeHead( + 3, + 16, + num_classes=19, + loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_1'), + dict(type='CrossEntropyLoss', loss_name='loss_2') + ]) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + head, target = to_cuda(head, target) + loss = head.losses(seg_logit=inputs, seg_label=target) + assert 'loss_1' in loss + assert 'loss_2' in loss + + # 'loss_decode' must be a dict or sequence of dict + with pytest.raises(TypeError): + BaseDecodeHead(3, 16, num_classes=19, loss_decode=['CrossEntropyLoss']) + with pytest.raises(TypeError): + BaseDecodeHead(3, 16, num_classes=19, loss_decode=0) + + # test multi-loss, loss_decode is list of dict + inputs = torch.randn(2, 19, 8, 8).float() + target = torch.ones(2, 1, 64, 64).long() + head = BaseDecodeHead( + 3, + 16, + num_classes=19, + loss_decode=(dict(type='CrossEntropyLoss', loss_name='loss_1'), + dict(type='CrossEntropyLoss', loss_name='loss_2'), + dict(type='CrossEntropyLoss', loss_name='loss_3'))) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + head, target = to_cuda(head, target) + loss = head.losses(seg_logit=inputs, seg_label=target) + assert 'loss_1' in loss + assert 'loss_2' in loss + assert 'loss_3' in loss + + # test multi-loss, loss_decode is list of dict, names of them are identical + inputs = torch.randn(2, 19, 8, 8).float() + target = torch.ones(2, 1, 64, 64).long() + head = BaseDecodeHead( + 3, + 16, + num_classes=19, + loss_decode=(dict(type='CrossEntropyLoss', loss_name='loss_ce'), + dict(type='CrossEntropyLoss', loss_name='loss_ce'), + dict(type='CrossEntropyLoss', loss_name='loss_ce'))) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + head, target = to_cuda(head, target) + loss_3 = head.losses(seg_logit=inputs, seg_label=target) + + head = BaseDecodeHead( + 3, + 16, + num_classes=19, + loss_decode=(dict(type='CrossEntropyLoss', loss_name='loss_ce'))) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + head, target = to_cuda(head, target) + loss = head.losses(seg_logit=inputs, seg_label=target) + assert 'loss_ce' in loss + assert 'loss_ce' in loss_3 + assert loss_3['loss_ce'] == 3 * loss['loss_ce'] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_dm_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_dm_head.py new file mode 100644 index 0000000..a922ff7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_dm_head.py @@ -0,0 +1,59 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import DMHead +from .utils import _conv_has_norm, to_cuda + + +def test_dm_head(): + + with pytest.raises(AssertionError): + # filter_sizes must be list|tuple + DMHead(in_channels=8, channels=4, num_classes=19, filter_sizes=1) + + # test no norm_cfg + head = DMHead(in_channels=8, channels=4, num_classes=19) + assert not _conv_has_norm(head, sync_bn=False) + + # test with norm_cfg + head = DMHead( + in_channels=8, + channels=4, + num_classes=19, + norm_cfg=dict(type='SyncBN')) + assert _conv_has_norm(head, sync_bn=True) + + # fusion=True + inputs = [torch.randn(1, 8, 23, 23)] + head = DMHead( + in_channels=8, + channels=4, + num_classes=19, + filter_sizes=(1, 3, 5), + fusion=True) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.fusion is True + assert head.dcm_modules[0].filter_size == 1 + assert head.dcm_modules[1].filter_size == 3 + assert head.dcm_modules[2].filter_size == 5 + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) + + # fusion=False + inputs = [torch.randn(1, 8, 23, 23)] + head = DMHead( + in_channels=8, + channels=4, + num_classes=19, + filter_sizes=(1, 3, 5), + fusion=False) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.fusion is False + assert head.dcm_modules[0].filter_size == 1 + assert head.dcm_modules[1].filter_size == 3 + assert head.dcm_modules[2].filter_size == 5 + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_dnl_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_dnl_head.py new file mode 100644 index 0000000..720cb07 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_dnl_head.py @@ -0,0 +1,44 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import DNLHead +from .utils import to_cuda + + +def test_dnl_head(): + # DNL with 'embedded_gaussian' mode + head = DNLHead(in_channels=8, channels=4, num_classes=19) + assert len(head.convs) == 2 + assert hasattr(head, 'dnl_block') + assert head.dnl_block.temperature == 0.05 + inputs = [torch.randn(1, 8, 23, 23)] + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) + + # NonLocal2d with 'dot_product' mode + head = DNLHead( + in_channels=8, channels=4, num_classes=19, mode='dot_product') + inputs = [torch.randn(1, 8, 23, 23)] + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) + + # NonLocal2d with 'gaussian' mode + head = DNLHead(in_channels=8, channels=4, num_classes=19, mode='gaussian') + inputs = [torch.randn(1, 8, 23, 23)] + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) + + # NonLocal2d with 'concatenation' mode + head = DNLHead( + in_channels=8, channels=4, num_classes=19, mode='concatenation') + inputs = [torch.randn(1, 8, 23, 23)] + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_dpt_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_dpt_head.py new file mode 100644 index 0000000..0a6af61 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_dpt_head.py @@ -0,0 +1,49 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import DPTHead + + +def test_dpt_head(): + + with pytest.raises(AssertionError): + # input_transform must be 'multiple_select' + head = DPTHead( + in_channels=[768, 768, 768, 768], + channels=4, + num_classes=19, + in_index=[0, 1, 2, 3]) + + head = DPTHead( + in_channels=[768, 768, 768, 768], + channels=4, + num_classes=19, + in_index=[0, 1, 2, 3], + input_transform='multiple_select') + + inputs = [[torch.randn(4, 768, 2, 2), + torch.randn(4, 768)] for _ in range(4)] + output = head(inputs) + assert output.shape == torch.Size((4, 19, 16, 16)) + + # test readout operation + head = DPTHead( + in_channels=[768, 768, 768, 768], + channels=4, + num_classes=19, + in_index=[0, 1, 2, 3], + input_transform='multiple_select', + readout_type='add') + output = head(inputs) + assert output.shape == torch.Size((4, 19, 16, 16)) + + head = DPTHead( + in_channels=[768, 768, 768, 768], + channels=4, + num_classes=19, + in_index=[0, 1, 2, 3], + input_transform='multiple_select', + readout_type='project') + output = head(inputs) + assert output.shape == torch.Size((4, 19, 16, 16)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_ema_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_ema_head.py new file mode 100644 index 0000000..1811cd2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_ema_head.py @@ -0,0 +1,23 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import EMAHead +from .utils import to_cuda + + +def test_emanet_head(): + head = EMAHead( + in_channels=4, + ema_channels=3, + channels=2, + num_stages=3, + num_bases=2, + num_classes=19) + for param in head.ema_mid_conv.parameters(): + assert not param.requires_grad + assert hasattr(head, 'ema_module') + inputs = [torch.randn(1, 4, 23, 23)] + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_enc_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_enc_head.py new file mode 100644 index 0000000..9c84c75 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_enc_head.py @@ -0,0 +1,47 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import EncHead +from .utils import to_cuda + + +def test_enc_head(): + # with se_loss, w.o. lateral + inputs = [torch.randn(1, 8, 21, 21)] + head = EncHead(in_channels=[8], channels=4, num_classes=19, in_index=[-1]) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert isinstance(outputs, tuple) and len(outputs) == 2 + assert outputs[0].shape == (1, head.num_classes, 21, 21) + assert outputs[1].shape == (1, head.num_classes) + + # w.o se_loss, w.o. lateral + inputs = [torch.randn(1, 8, 21, 21)] + head = EncHead( + in_channels=[8], + channels=4, + use_se_loss=False, + num_classes=19, + in_index=[-1]) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 21, 21) + + # with se_loss, with lateral + inputs = [torch.randn(1, 4, 45, 45), torch.randn(1, 8, 21, 21)] + head = EncHead( + in_channels=[4, 8], + channels=4, + add_lateral=True, + num_classes=19, + in_index=[-2, -1]) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert isinstance(outputs, tuple) and len(outputs) == 2 + assert outputs[0].shape == (1, head.num_classes, 21, 21) + assert outputs[1].shape == (1, head.num_classes) + test_output = head.forward_test(inputs, None, None) + assert test_output.shape == (1, head.num_classes, 21, 21) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_fcn_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_fcn_head.py new file mode 100644 index 0000000..4e633fb --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_fcn_head.py @@ -0,0 +1,131 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch +from mmcv.cnn import ConvModule, DepthwiseSeparableConvModule +from mmcv.utils.parrots_wrapper import SyncBatchNorm + +from mmseg.models.decode_heads import DepthwiseSeparableFCNHead, FCNHead +from .utils import to_cuda + + +def test_fcn_head(): + + with pytest.raises(AssertionError): + # num_convs must be not less than 0 + FCNHead(num_classes=19, num_convs=-1) + + # test no norm_cfg + head = FCNHead(in_channels=8, channels=4, num_classes=19) + for m in head.modules(): + if isinstance(m, ConvModule): + assert not m.with_norm + + # test with norm_cfg + head = FCNHead( + in_channels=8, + channels=4, + num_classes=19, + norm_cfg=dict(type='SyncBN')) + for m in head.modules(): + if isinstance(m, ConvModule): + assert m.with_norm and isinstance(m.bn, SyncBatchNorm) + + # test concat_input=False + inputs = [torch.randn(1, 8, 23, 23)] + head = FCNHead( + in_channels=8, channels=4, num_classes=19, concat_input=False) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert len(head.convs) == 2 + assert not head.concat_input and not hasattr(head, 'conv_cat') + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) + + # test concat_input=True + inputs = [torch.randn(1, 8, 23, 23)] + head = FCNHead( + in_channels=8, channels=4, num_classes=19, concat_input=True) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert len(head.convs) == 2 + assert head.concat_input + assert head.conv_cat.in_channels == 12 + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) + + # test kernel_size=3 + inputs = [torch.randn(1, 8, 23, 23)] + head = FCNHead(in_channels=8, channels=4, num_classes=19) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + for i in range(len(head.convs)): + assert head.convs[i].kernel_size == (3, 3) + assert head.convs[i].padding == 1 + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) + + # test kernel_size=1 + inputs = [torch.randn(1, 8, 23, 23)] + head = FCNHead(in_channels=8, channels=4, num_classes=19, kernel_size=1) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + for i in range(len(head.convs)): + assert head.convs[i].kernel_size == (1, 1) + assert head.convs[i].padding == 0 + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) + + # test num_conv + inputs = [torch.randn(1, 8, 23, 23)] + head = FCNHead(in_channels=8, channels=4, num_classes=19, num_convs=1) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert len(head.convs) == 1 + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) + + # test num_conv = 0 + inputs = [torch.randn(1, 8, 23, 23)] + head = FCNHead( + in_channels=8, + channels=8, + num_classes=19, + num_convs=0, + concat_input=False) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert isinstance(head.convs, torch.nn.Identity) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) + + +def test_sep_fcn_head(): + # test sep_fcn_head with concat_input=False + head = DepthwiseSeparableFCNHead( + in_channels=128, + channels=128, + concat_input=False, + num_classes=19, + in_index=-1, + norm_cfg=dict(type='BN', requires_grad=True, momentum=0.01)) + x = [torch.rand(2, 128, 8, 8)] + output = head(x) + assert output.shape == (2, head.num_classes, 8, 8) + assert not head.concat_input + assert isinstance(head.convs[0], DepthwiseSeparableConvModule) + assert isinstance(head.convs[1], DepthwiseSeparableConvModule) + assert head.conv_seg.kernel_size == (1, 1) + + head = DepthwiseSeparableFCNHead( + in_channels=64, + channels=64, + concat_input=True, + num_classes=19, + in_index=-1, + norm_cfg=dict(type='BN', requires_grad=True, momentum=0.01)) + x = [torch.rand(3, 64, 8, 8)] + output = head(x) + assert output.shape == (3, head.num_classes, 8, 8) + assert head.concat_input + assert isinstance(head.convs[0], DepthwiseSeparableConvModule) + assert isinstance(head.convs[1], DepthwiseSeparableConvModule) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_gc_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_gc_head.py new file mode 100644 index 0000000..c62ac9a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_gc_head.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import GCHead +from .utils import to_cuda + + +def test_gc_head(): + head = GCHead(in_channels=4, channels=4, num_classes=19) + assert len(head.convs) == 2 + assert hasattr(head, 'gc_block') + inputs = [torch.randn(1, 4, 23, 23)] + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_isa_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_isa_head.py new file mode 100644 index 0000000..b177f6d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_isa_head.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import ISAHead +from .utils import to_cuda + + +def test_isa_head(): + + inputs = [torch.randn(1, 8, 23, 23)] + isa_head = ISAHead( + in_channels=8, + channels=4, + num_classes=19, + isa_channels=4, + down_factor=(8, 8)) + if torch.cuda.is_available(): + isa_head, inputs = to_cuda(isa_head, inputs) + output = isa_head(inputs) + assert output.shape == (1, isa_head.num_classes, 23, 23) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_knet_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_knet_head.py new file mode 100644 index 0000000..e6845a6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_knet_head.py @@ -0,0 +1,195 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads.knet_head import (IterativeDecodeHead, + KernelUpdateHead) +from .utils import to_cuda + +num_stages = 3 +conv_kernel_size = 1 + +kernel_updator_cfg = dict( + type='KernelUpdator', + in_channels=16, + feat_channels=16, + out_channels=16, + gate_norm_act=True, + activate_out=True, + act_cfg=dict(type='ReLU', inplace=True), + norm_cfg=dict(type='LN')) + + +def test_knet_head(): + # test init function of kernel update head + kernel_update_head = KernelUpdateHead( + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=128, + in_channels=32, + out_channels=32, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict(conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_init=True, + kernel_updator_cfg=kernel_updator_cfg) + kernel_update_head.init_weights() + + head = IterativeDecodeHead( + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=128, + in_channels=32, + out_channels=32, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict( + conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_init=False, + kernel_updator_cfg=kernel_updator_cfg) + for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=32, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=150, + align_corners=False)) + head.init_weights() + inputs = [ + torch.randn(1, 16, 27, 32), + torch.randn(1, 32, 27, 16), + torch.randn(1, 64, 27, 16), + torch.randn(1, 128, 27, 16) + ] + + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs[-1].shape == (1, head.num_classes, 27, 16) + + # test whether only return the prediction of + # the last stage during testing + with torch.no_grad(): + head.eval() + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 27, 16) + + # test K-Net without `feat_transform_cfg` + head = IterativeDecodeHead( + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=128, + in_channels=32, + out_channels=32, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=None, + kernel_updator_cfg=kernel_updator_cfg) + for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=32, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=150, + align_corners=False)) + head.init_weights() + + inputs = [ + torch.randn(1, 16, 27, 32), + torch.randn(1, 32, 27, 16), + torch.randn(1, 64, 27, 16), + torch.randn(1, 128, 27, 16) + ] + + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs[-1].shape == (1, head.num_classes, 27, 16) + + # test K-Net with + # self.mask_transform_stride == 2 and self.feat_gather_stride == 1 + head = IterativeDecodeHead( + num_stages=num_stages, + kernel_update_head=[ + dict( + type='KernelUpdateHead', + num_classes=150, + num_ffn_fcs=2, + num_heads=8, + num_mask_fcs=1, + feedforward_channels=128, + in_channels=32, + out_channels=32, + dropout=0.0, + conv_kernel_size=conv_kernel_size, + ffn_act_cfg=dict(type='ReLU', inplace=True), + with_ffn=True, + feat_transform_cfg=dict( + conv_cfg=dict(type='Conv2d'), act_cfg=None), + kernel_init=False, + mask_transform_stride=2, + feat_gather_stride=1, + kernel_updator_cfg=kernel_updator_cfg) + for _ in range(num_stages) + ], + kernel_generate_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=32, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=150, + align_corners=False)) + head.init_weights() + + inputs = [ + torch.randn(1, 16, 27, 32), + torch.randn(1, 32, 27, 16), + torch.randn(1, 64, 27, 16), + torch.randn(1, 128, 27, 16) + ] + + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs[-1].shape == (1, head.num_classes, 26, 16) + + # test loss function in K-Net + fake_label = torch.ones_like( + outputs[-1][:, 0:1, :, :], dtype=torch.int16).long() + loss = head.losses(seg_logit=outputs, seg_label=fake_label) + assert loss['loss_ce.s0'] != torch.zeros_like(loss['loss_ce.s0']) + assert loss['loss_ce.s1'] != torch.zeros_like(loss['loss_ce.s1']) + assert loss['loss_ce.s2'] != torch.zeros_like(loss['loss_ce.s2']) + assert loss['loss_ce.s3'] != torch.zeros_like(loss['loss_ce.s3']) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_lraspp_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_lraspp_head.py new file mode 100644 index 0000000..a46e6a1 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_lraspp_head.py @@ -0,0 +1,68 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import LRASPPHead + + +def test_lraspp_head(): + with pytest.raises(ValueError): + # check invalid input_transform + LRASPPHead( + in_channels=(4, 4, 123), + in_index=(0, 1, 2), + channels=32, + input_transform='resize_concat', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + + with pytest.raises(AssertionError): + # check invalid branch_channels + LRASPPHead( + in_channels=(4, 4, 123), + in_index=(0, 1, 2), + channels=32, + branch_channels=64, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + + # test with default settings + lraspp_head = LRASPPHead( + in_channels=(4, 4, 123), + in_index=(0, 1, 2), + channels=32, + input_transform='multiple_select', + dropout_ratio=0.1, + num_classes=19, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU'), + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)) + inputs = [ + torch.randn(2, 4, 45, 45), + torch.randn(2, 4, 28, 28), + torch.randn(2, 123, 14, 14) + ] + with pytest.raises(RuntimeError): + # check invalid inputs + output = lraspp_head(inputs) + + inputs = [ + torch.randn(2, 4, 111, 111), + torch.randn(2, 4, 77, 77), + torch.randn(2, 123, 55, 55) + ] + output = lraspp_head(inputs) + assert output.shape == (2, 19, 111, 111) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_nl_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_nl_head.py new file mode 100644 index 0000000..d4ef0b9 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_nl_head.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import NLHead +from .utils import to_cuda + + +def test_nl_head(): + head = NLHead(in_channels=8, channels=4, num_classes=19) + assert len(head.convs) == 2 + assert hasattr(head, 'nl_block') + inputs = [torch.randn(1, 8, 23, 23)] + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_ocr_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_ocr_head.py new file mode 100644 index 0000000..5e5d669 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_ocr_head.py @@ -0,0 +1,19 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import FCNHead, OCRHead +from .utils import to_cuda + + +def test_ocr_head(): + + inputs = [torch.randn(1, 8, 23, 23)] + ocr_head = OCRHead( + in_channels=8, channels=4, num_classes=19, ocr_channels=8) + fcn_head = FCNHead(in_channels=8, channels=4, num_classes=19) + if torch.cuda.is_available(): + head, inputs = to_cuda(ocr_head, inputs) + head, inputs = to_cuda(fcn_head, inputs) + prev_output = fcn_head(inputs) + output = ocr_head(inputs, prev_output) + assert output.shape == (1, ocr_head.num_classes, 23, 23) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_point_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_point_head.py new file mode 100644 index 0000000..142ab16 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_point_head.py @@ -0,0 +1,61 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +from mmcv.utils import ConfigDict + +from mmseg.models.decode_heads import FCNHead, PointHead +from .utils import to_cuda + + +def test_point_head(): + + inputs = [torch.randn(1, 32, 45, 45)] + point_head = PointHead( + in_channels=[32], in_index=[0], channels=16, num_classes=19) + assert len(point_head.fcs) == 3 + fcn_head = FCNHead(in_channels=32, channels=16, num_classes=19) + if torch.cuda.is_available(): + head, inputs = to_cuda(point_head, inputs) + head, inputs = to_cuda(fcn_head, inputs) + prev_output = fcn_head(inputs) + test_cfg = ConfigDict( + subdivision_steps=2, subdivision_num_points=8196, scale_factor=2) + output = point_head.forward_test(inputs, prev_output, None, test_cfg) + assert output.shape == (1, point_head.num_classes, 180, 180) + + # test multiple losses case + inputs = [torch.randn(1, 32, 45, 45)] + point_head_multiple_losses = PointHead( + in_channels=[32], + in_index=[0], + channels=16, + num_classes=19, + loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_1'), + dict(type='CrossEntropyLoss', loss_name='loss_2') + ]) + assert len(point_head_multiple_losses.fcs) == 3 + fcn_head_multiple_losses = FCNHead( + in_channels=32, + channels=16, + num_classes=19, + loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_1'), + dict(type='CrossEntropyLoss', loss_name='loss_2') + ]) + if torch.cuda.is_available(): + head, inputs = to_cuda(point_head_multiple_losses, inputs) + head, inputs = to_cuda(fcn_head_multiple_losses, inputs) + prev_output = fcn_head_multiple_losses(inputs) + test_cfg = ConfigDict( + subdivision_steps=2, subdivision_num_points=8196, scale_factor=2) + output = point_head_multiple_losses.forward_test(inputs, prev_output, None, + test_cfg) + assert output.shape == (1, point_head.num_classes, 180, 180) + + fake_label = torch.ones([1, 180, 180], dtype=torch.long) + + if torch.cuda.is_available(): + fake_label = fake_label.cuda() + loss = point_head_multiple_losses.losses(output, fake_label) + assert 'pointloss_1' in loss + assert 'pointloss_2' in loss diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_psa_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_psa_head.py new file mode 100644 index 0000000..34f592b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_psa_head.py @@ -0,0 +1,122 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import PSAHead +from .utils import _conv_has_norm, to_cuda + + +def test_psa_head(): + + with pytest.raises(AssertionError): + # psa_type must be in 'bi-direction', 'collect', 'distribute' + PSAHead( + in_channels=4, + channels=2, + num_classes=19, + mask_size=(13, 13), + psa_type='gather') + + # test no norm_cfg + head = PSAHead( + in_channels=4, channels=2, num_classes=19, mask_size=(13, 13)) + assert not _conv_has_norm(head, sync_bn=False) + + # test with norm_cfg + head = PSAHead( + in_channels=4, + channels=2, + num_classes=19, + mask_size=(13, 13), + norm_cfg=dict(type='SyncBN')) + assert _conv_has_norm(head, sync_bn=True) + + # test 'bi-direction' psa_type + inputs = [torch.randn(1, 4, 13, 13)] + head = PSAHead( + in_channels=4, channels=2, num_classes=19, mask_size=(13, 13)) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 13, 13) + + # test 'bi-direction' psa_type, shrink_factor=1 + inputs = [torch.randn(1, 4, 13, 13)] + head = PSAHead( + in_channels=4, + channels=2, + num_classes=19, + mask_size=(13, 13), + shrink_factor=1) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 13, 13) + + # test 'bi-direction' psa_type with soft_max + inputs = [torch.randn(1, 4, 13, 13)] + head = PSAHead( + in_channels=4, + channels=2, + num_classes=19, + mask_size=(13, 13), + psa_softmax=True) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 13, 13) + + # test 'collect' psa_type + inputs = [torch.randn(1, 4, 13, 13)] + head = PSAHead( + in_channels=4, + channels=2, + num_classes=19, + mask_size=(13, 13), + psa_type='collect') + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 13, 13) + + # test 'collect' psa_type, shrink_factor=1 + inputs = [torch.randn(1, 4, 13, 13)] + head = PSAHead( + in_channels=4, + channels=2, + num_classes=19, + mask_size=(13, 13), + shrink_factor=1, + psa_type='collect') + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 13, 13) + + # test 'collect' psa_type, shrink_factor=1, compact=True + inputs = [torch.randn(1, 4, 13, 13)] + head = PSAHead( + in_channels=4, + channels=2, + num_classes=19, + mask_size=(13, 13), + psa_type='collect', + shrink_factor=1, + compact=True) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 13, 13) + + # test 'distribute' psa_type + inputs = [torch.randn(1, 4, 13, 13)] + head = PSAHead( + in_channels=4, + channels=2, + num_classes=19, + mask_size=(13, 13), + psa_type='distribute') + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 13, 13) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_psp_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_psp_head.py new file mode 100644 index 0000000..fde4087 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_psp_head.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import PSPHead +from .utils import _conv_has_norm, to_cuda + + +def test_psp_head(): + + with pytest.raises(AssertionError): + # pool_scales must be list|tuple + PSPHead(in_channels=4, channels=2, num_classes=19, pool_scales=1) + + # test no norm_cfg + head = PSPHead(in_channels=4, channels=2, num_classes=19) + assert not _conv_has_norm(head, sync_bn=False) + + # test with norm_cfg + head = PSPHead( + in_channels=4, + channels=2, + num_classes=19, + norm_cfg=dict(type='SyncBN')) + assert _conv_has_norm(head, sync_bn=True) + + inputs = [torch.randn(1, 4, 23, 23)] + head = PSPHead( + in_channels=4, channels=2, num_classes=19, pool_scales=(1, 2, 3)) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + assert head.psp_modules[0][0].output_size == 1 + assert head.psp_modules[1][0].output_size == 2 + assert head.psp_modules[2][0].output_size == 3 + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 23, 23) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_segformer_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_segformer_head.py new file mode 100644 index 0000000..73afaba --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_segformer_head.py @@ -0,0 +1,40 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import SegformerHead + + +def test_segformer_head(): + with pytest.raises(AssertionError): + # `in_channels` must have same length as `in_index` + SegformerHead( + in_channels=(1, 2, 3), in_index=(0, 1), channels=5, num_classes=2) + + H, W = (64, 64) + in_channels = (32, 64, 160, 256) + shapes = [(H // 2**(i + 2), W // 2**(i + 2)) + for i in range(len(in_channels))] + model = SegformerHead( + in_channels=in_channels, + in_index=[0, 1, 2, 3], + channels=256, + num_classes=19) + + with pytest.raises(IndexError): + # in_index must match the input feature maps. + inputs = [ + torch.randn((1, in_channel, *shape)) + for in_channel, shape in zip(in_channels, shapes) + ][:3] + temp = model(inputs) + + # Normal Input + # ((1, 32, 16, 16), (1, 64, 8, 8), (1, 160, 4, 4), (1, 256, 2, 2) + inputs = [ + torch.randn((1, in_channel, *shape)) + for in_channel, shape in zip(in_channels, shapes) + ] + temp = model(inputs) + + assert temp.shape == (1, 19, H // 4, W // 4) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_segmenter_mask_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_segmenter_mask_head.py new file mode 100644 index 0000000..7b681ac --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_segmenter_mask_head.py @@ -0,0 +1,24 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import SegmenterMaskTransformerHead +from .utils import _conv_has_norm, to_cuda + + +def test_segmenter_mask_transformer_head(): + head = SegmenterMaskTransformerHead( + in_channels=2, + channels=2, + num_classes=150, + num_layers=2, + num_heads=3, + embed_dims=192, + dropout_ratio=0.0) + assert _conv_has_norm(head, sync_bn=True) + head.init_weights() + + inputs = [torch.randn(1, 2, 32, 32)] + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 32, 32) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_setr_mla_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_setr_mla_head.py new file mode 100644 index 0000000..301bc0b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_setr_mla_head.py @@ -0,0 +1,63 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import SETRMLAHead +from .utils import to_cuda + + +def test_setr_mla_head(capsys): + + with pytest.raises(AssertionError): + # MLA requires input multiple stage feature information. + SETRMLAHead(in_channels=8, channels=4, num_classes=19, in_index=1) + + with pytest.raises(AssertionError): + # multiple in_indexs requires multiple in_channels. + SETRMLAHead( + in_channels=8, channels=4, num_classes=19, in_index=(0, 1, 2, 3)) + + with pytest.raises(AssertionError): + # channels should be len(in_channels) * mla_channels + SETRMLAHead( + in_channels=(8, 8, 8, 8), + channels=8, + mla_channels=4, + in_index=(0, 1, 2, 3), + num_classes=19) + + # test inference of MLA head + img_size = (8, 8) + patch_size = 4 + head = SETRMLAHead( + in_channels=(8, 8, 8, 8), + channels=16, + mla_channels=4, + in_index=(0, 1, 2, 3), + num_classes=19, + norm_cfg=dict(type='BN')) + + h, w = img_size[0] // patch_size, img_size[1] // patch_size + # Input square NCHW format feature information + x = [ + torch.randn(1, 8, h, w), + torch.randn(1, 8, h, w), + torch.randn(1, 8, h, w), + torch.randn(1, 8, h, w) + ] + if torch.cuda.is_available(): + head, x = to_cuda(head, x) + out = head(x) + assert out.shape == (1, head.num_classes, h * 4, w * 4) + + # Input non-square NCHW format feature information + x = [ + torch.randn(1, 8, h, w * 2), + torch.randn(1, 8, h, w * 2), + torch.randn(1, 8, h, w * 2), + torch.randn(1, 8, h, w * 2) + ] + if torch.cuda.is_available(): + head, x = to_cuda(head, x) + out = head(x) + assert out.shape == (1, head.num_classes, h * 4, w * 8) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_setr_up_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_setr_up_head.py new file mode 100644 index 0000000..a051922 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_setr_up_head.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import SETRUPHead +from .utils import to_cuda + + +def test_setr_up_head(capsys): + + with pytest.raises(AssertionError): + # kernel_size must be [1/3] + SETRUPHead(num_classes=19, kernel_size=2) + + with pytest.raises(AssertionError): + # in_channels must be int type and in_channels must be same + # as embed_dim. + SETRUPHead(in_channels=(4, 4), channels=2, num_classes=19) + + # test init_cfg of head + head = SETRUPHead( + in_channels=4, + channels=2, + norm_cfg=dict(type='SyncBN'), + num_classes=19, + init_cfg=dict(type='Kaiming')) + super(SETRUPHead, head).init_weights() + + # test inference of Naive head + # the auxiliary head of Naive head is same as Naive head + img_size = (4, 4) + patch_size = 2 + head = SETRUPHead( + in_channels=4, + channels=2, + num_classes=19, + num_convs=1, + up_scale=4, + kernel_size=1, + norm_cfg=dict(type='BN')) + + h, w = img_size[0] // patch_size, img_size[1] // patch_size + + # Input square NCHW format feature information + x = [torch.randn(1, 4, h, w)] + if torch.cuda.is_available(): + head, x = to_cuda(head, x) + out = head(x) + assert out.shape == (1, head.num_classes, h * 4, w * 4) + + # Input non-square NCHW format feature information + x = [torch.randn(1, 4, h, w * 2)] + if torch.cuda.is_available(): + head, x = to_cuda(head, x) + out = head(x) + assert out.shape == (1, head.num_classes, h * 4, w * 8) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_stdc_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_stdc_head.py new file mode 100644 index 0000000..1628209 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_stdc_head.py @@ -0,0 +1,31 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.decode_heads import STDCHead +from .utils import to_cuda + + +def test_stdc_head(): + inputs = [torch.randn(1, 32, 21, 21)] + head = STDCHead( + in_channels=32, + channels=8, + num_convs=1, + num_classes=2, + in_index=-1, + loss_decode=[ + dict( + type='CrossEntropyLoss', loss_name='loss_ce', loss_weight=1.0), + dict(type='DiceLoss', loss_name='loss_dice', loss_weight=1.0) + ]) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert isinstance(outputs, torch.Tensor) and len(outputs) == 1 + assert outputs.shape == torch.Size([1, head.num_classes, 21, 21]) + + fake_label = torch.ones_like( + outputs[:, 0:1, :, :], dtype=torch.int16).long() + loss = head.losses(seg_logit=outputs, seg_label=fake_label) + assert loss['loss_ce'] != torch.zeros_like(loss['loss_ce']) + assert loss['loss_dice'] != torch.zeros_like(loss['loss_dice']) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_uper_head.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_uper_head.py new file mode 100644 index 0000000..09456a8 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/test_uper_head.py @@ -0,0 +1,35 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.decode_heads import UPerHead +from .utils import _conv_has_norm, to_cuda + + +def test_uper_head(): + + with pytest.raises(AssertionError): + # fpn_in_channels must be list|tuple + UPerHead(in_channels=4, channels=2, num_classes=19) + + # test no norm_cfg + head = UPerHead( + in_channels=[4, 2], channels=2, num_classes=19, in_index=[-2, -1]) + assert not _conv_has_norm(head, sync_bn=False) + + # test with norm_cfg + head = UPerHead( + in_channels=[4, 2], + channels=2, + num_classes=19, + norm_cfg=dict(type='SyncBN'), + in_index=[-2, -1]) + assert _conv_has_norm(head, sync_bn=True) + + inputs = [torch.randn(1, 4, 45, 45), torch.randn(1, 2, 21, 21)] + head = UPerHead( + in_channels=[4, 2], channels=2, num_classes=19, in_index=[-2, -1]) + if torch.cuda.is_available(): + head, inputs = to_cuda(head, inputs) + outputs = head(inputs) + assert outputs.shape == (1, head.num_classes, 45, 45) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/utils.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/utils.py new file mode 100644 index 0000000..675241c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_heads/utils.py @@ -0,0 +1,22 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv.cnn import ConvModule +from mmcv.utils.parrots_wrapper import SyncBatchNorm + + +def _conv_has_norm(module, sync_bn): + for m in module.modules(): + if isinstance(m, ConvModule): + if not m.with_norm: + return False + if sync_bn: + if not isinstance(m.bn, SyncBatchNorm): + return False + return True + + +def to_cuda(module, data): + module = module.cuda() + if isinstance(data, list): + for i in range(len(data)): + data[i] = data[i].cuda() + return module, data diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/__init__.py new file mode 100644 index 0000000..ef101fe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/__init__.py @@ -0,0 +1 @@ +# Copyright (c) OpenMMLab. All rights reserved. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_ce_loss.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_ce_loss.py new file mode 100644 index 0000000..afa5706 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_ce_loss.py @@ -0,0 +1,294 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.losses.cross_entropy_loss import _expand_onehot_labels + + +@pytest.mark.parametrize('use_sigmoid', [True, False]) +@pytest.mark.parametrize('reduction', ('mean', 'sum', 'none')) +@pytest.mark.parametrize('avg_non_ignore', [True, False]) +@pytest.mark.parametrize('bce_input_same_dim', [True, False]) +def test_ce_loss(use_sigmoid, reduction, avg_non_ignore, bce_input_same_dim): + from mmseg.models import build_loss + + # use_mask and use_sigmoid cannot be true at the same time + with pytest.raises(AssertionError): + loss_cfg = dict( + type='CrossEntropyLoss', + use_mask=True, + use_sigmoid=True, + loss_weight=1.0) + build_loss(loss_cfg) + + # test loss with simple case for ce/bce + fake_pred = torch.Tensor([[100, -100]]) + fake_label = torch.Tensor([1]).long() + loss_cls_cfg = dict( + type='CrossEntropyLoss', + use_sigmoid=use_sigmoid, + loss_weight=1.0, + avg_non_ignore=avg_non_ignore, + loss_name='loss_ce') + loss_cls = build_loss(loss_cls_cfg) + if use_sigmoid: + assert torch.allclose( + loss_cls(fake_pred, fake_label), torch.tensor(100.)) + else: + assert torch.allclose( + loss_cls(fake_pred, fake_label), torch.tensor(200.)) + + # test loss with complicated case for ce/bce + # when avg_non_ignore is False, `avg_factor` would not be calculated + fake_pred = torch.full(size=(2, 21, 8, 8), fill_value=0.5) + fake_label = torch.ones(2, 8, 8).long() + fake_label[:, 0, 0] = 255 + fake_weight = None + # extra test bce loss when pred.shape == label.shape + if use_sigmoid and bce_input_same_dim: + fake_pred = torch.randn(2, 10).float() + fake_label = torch.rand(2, 10).float() + fake_weight = torch.rand(2, 10) # set weight in forward function + fake_label[0, [1, 2, 5, 7]] = 255 # set ignore_index + fake_label[1, [0, 5, 8, 9]] = 255 + loss_cls = build_loss(loss_cls_cfg) + loss = loss_cls( + fake_pred, fake_label, weight=fake_weight, ignore_index=255) + if use_sigmoid: + if fake_pred.dim() != fake_label.dim(): + fake_label, weight, valid_mask = _expand_onehot_labels( + labels=fake_label, + label_weights=None, + target_shape=fake_pred.shape, + ignore_index=255) + else: + # should mask out the ignored elements + valid_mask = ((fake_label >= 0) & (fake_label != 255)).float() + weight = valid_mask + torch_loss = torch.nn.functional.binary_cross_entropy_with_logits( + fake_pred, + fake_label.float(), + reduction='none', + weight=fake_weight) + if avg_non_ignore: + avg_factor = valid_mask.sum().item() + torch_loss = (torch_loss * weight).sum() / avg_factor + else: + torch_loss = (torch_loss * weight).mean() + else: + if avg_non_ignore: + torch_loss = torch.nn.functional.cross_entropy( + fake_pred, fake_label, reduction='mean', ignore_index=255) + else: + torch_loss = torch.nn.functional.cross_entropy( + fake_pred, fake_label, reduction='sum', + ignore_index=255) / fake_label.numel() + assert torch.allclose(loss, torch_loss) + + if use_sigmoid: + # test loss with complicated case for ce/bce + # when avg_non_ignore is False, `avg_factor` would not be calculated + fake_pred = torch.full(size=(2, 21, 8, 8), fill_value=0.5) + fake_label = torch.ones(2, 8, 8).long() + fake_label[:, 0, 0] = 255 + fake_weight = torch.rand(2, 8, 8) + + loss_cls = build_loss(loss_cls_cfg) + loss = loss_cls( + fake_pred, fake_label, weight=fake_weight, ignore_index=255) + if use_sigmoid: + fake_label, weight, valid_mask = _expand_onehot_labels( + labels=fake_label, + label_weights=None, + target_shape=fake_pred.shape, + ignore_index=255) + torch_loss = torch.nn.functional.binary_cross_entropy_with_logits( + fake_pred, + fake_label.float(), + reduction='none', + weight=fake_weight.unsqueeze(1).expand(fake_pred.shape)) + if avg_non_ignore: + avg_factor = valid_mask.sum().item() + torch_loss = (torch_loss * weight).sum() / avg_factor + else: + torch_loss = (torch_loss * weight).mean() + assert torch.allclose(loss, torch_loss) + + # test loss with class weights from file + fake_pred = torch.Tensor([[100, -100]]) + fake_label = torch.Tensor([1]).long() + import os + import tempfile + + import mmcv + import numpy as np + tmp_file = tempfile.NamedTemporaryFile() + + mmcv.dump([0.8, 0.2], f'{tmp_file.name}.pkl', 'pkl') # from pkl file + loss_cls_cfg = dict( + type='CrossEntropyLoss', + use_sigmoid=False, + class_weight=f'{tmp_file.name}.pkl', + loss_weight=1.0, + loss_name='loss_ce') + loss_cls = build_loss(loss_cls_cfg) + assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(40.)) + + np.save(f'{tmp_file.name}.npy', np.array([0.8, 0.2])) # from npy file + loss_cls_cfg = dict( + type='CrossEntropyLoss', + use_sigmoid=False, + class_weight=f'{tmp_file.name}.npy', + loss_weight=1.0, + loss_name='loss_ce') + loss_cls = build_loss(loss_cls_cfg) + assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(40.)) + tmp_file.close() + os.remove(f'{tmp_file.name}.pkl') + os.remove(f'{tmp_file.name}.npy') + + loss_cls_cfg = dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0) + loss_cls = build_loss(loss_cls_cfg) + assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(200.)) + + # test `avg_non_ignore` without ignore index would not affect ce/bce loss + # when reduction='sum'/'none'/'mean' + loss_cls_cfg1 = dict( + type='CrossEntropyLoss', + use_sigmoid=use_sigmoid, + reduction=reduction, + loss_weight=1.0, + avg_non_ignore=True) + loss_cls1 = build_loss(loss_cls_cfg1) + loss_cls_cfg2 = dict( + type='CrossEntropyLoss', + use_sigmoid=use_sigmoid, + reduction=reduction, + loss_weight=1.0, + avg_non_ignore=False) + loss_cls2 = build_loss(loss_cls_cfg2) + assert torch.allclose( + loss_cls1(fake_pred, fake_label, ignore_index=255) / fake_pred.numel(), + loss_cls2(fake_pred, fake_label, ignore_index=255) / fake_pred.numel(), + atol=1e-4) + + # test ce/bce loss with ignore index and class weight + # in 5-way classification + if use_sigmoid: + # test bce loss when pred.shape == or != label.shape + if bce_input_same_dim: + fake_pred = torch.randn(2, 10).float() + fake_label = torch.rand(2, 10).float() + class_weight = torch.rand(2, 10) + else: + fake_pred = torch.full(size=(2, 21, 8, 8), fill_value=0.5) + fake_label = torch.ones(2, 8, 8).long() + class_weight = torch.randn(2, 21, 8, 8) + fake_label, weight, valid_mask = _expand_onehot_labels( + labels=fake_label, + label_weights=None, + target_shape=fake_pred.shape, + ignore_index=-100) + torch_loss = torch.nn.functional.binary_cross_entropy_with_logits( + fake_pred, + fake_label.float(), + reduction='mean', + pos_weight=class_weight) + else: + fake_pred = torch.randn(2, 5, 10).float() # 5-way classification + fake_label = torch.randint(0, 5, (2, 10)).long() + class_weight = torch.rand(5) + class_weight /= class_weight.sum() + torch_loss = torch.nn.functional.cross_entropy( + fake_pred, fake_label, reduction='sum', + weight=class_weight) / fake_label.numel() + loss_cls_cfg = dict( + type='CrossEntropyLoss', + use_sigmoid=use_sigmoid, + reduction='mean', + class_weight=class_weight, + loss_weight=1.0, + avg_non_ignore=avg_non_ignore) + loss_cls = build_loss(loss_cls_cfg) + + # test cross entropy loss has name `loss_ce` + assert loss_cls.loss_name == 'loss_ce' + # test avg_non_ignore is in extra_repr + assert loss_cls.extra_repr() == f'avg_non_ignore={avg_non_ignore}' + + loss = loss_cls(fake_pred, fake_label) + assert torch.allclose(loss, torch_loss) + + fake_label[0, [1, 2, 5, 7]] = 10 # set ignore_index + fake_label[1, [0, 5, 8, 9]] = 10 + loss = loss_cls(fake_pred, fake_label, ignore_index=10) + if use_sigmoid: + if avg_non_ignore: + torch_loss = torch.nn.functional.binary_cross_entropy_with_logits( + fake_pred[fake_label != 10], + fake_label[fake_label != 10].float(), + pos_weight=class_weight[fake_label != 10], + reduction='mean') + else: + torch_loss = torch.nn.functional.binary_cross_entropy_with_logits( + fake_pred[fake_label != 10], + fake_label[fake_label != 10].float(), + pos_weight=class_weight[fake_label != 10], + reduction='sum') / fake_label.numel() + else: + if avg_non_ignore: + torch_loss = torch.nn.functional.cross_entropy( + fake_pred, + fake_label, + ignore_index=10, + reduction='sum', + weight=class_weight) / fake_label[fake_label != 10].numel() + else: + torch_loss = torch.nn.functional.cross_entropy( + fake_pred, + fake_label, + ignore_index=10, + reduction='sum', + weight=class_weight) / fake_label.numel() + assert torch.allclose(loss, torch_loss) + + +@pytest.mark.parametrize('avg_non_ignore', [True, False]) +@pytest.mark.parametrize('with_weight', [True, False]) +def test_binary_class_ce_loss(avg_non_ignore, with_weight): + from mmseg.models import build_loss + + fake_pred = torch.rand(3, 1, 10, 10) + fake_label = torch.randint(0, 2, (3, 10, 10)) + fake_weight = torch.rand(3, 10, 10) + valid_mask = ((fake_label >= 0) & (fake_label != 255)).float() + weight = valid_mask + + torch_loss = torch.nn.functional.binary_cross_entropy_with_logits( + fake_pred, + fake_label.unsqueeze(1).float(), + reduction='none', + weight=fake_weight.unsqueeze(1).float() if with_weight else None) + if avg_non_ignore: + eps = torch.finfo(torch.float32).eps + avg_factor = valid_mask.sum().item() + torch_loss = (torch_loss * weight.unsqueeze(1)).sum() / ( + avg_factor + eps) + else: + torch_loss = (torch_loss * weight.unsqueeze(1)).mean() + + loss_cls_cfg = dict( + type='CrossEntropyLoss', + use_sigmoid=True, + loss_weight=1.0, + avg_non_ignore=avg_non_ignore, + reduction='mean', + loss_name='loss_ce') + loss_cls = build_loss(loss_cls_cfg) + loss = loss_cls( + fake_pred, + fake_label, + weight=fake_weight if with_weight else None, + ignore_index=255) + assert torch.allclose(loss, torch_loss) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_dice_loss.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_dice_loss.py new file mode 100644 index 0000000..3936f5d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_dice_loss.py @@ -0,0 +1,78 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + + +def test_dice_lose(): + from mmseg.models import build_loss + + # test dice loss with loss_type = 'multi_class' + loss_cfg = dict( + type='DiceLoss', + reduction='none', + class_weight=[1.0, 2.0, 3.0], + loss_weight=1.0, + ignore_index=1, + loss_name='loss_dice') + dice_loss = build_loss(loss_cfg) + logits = torch.rand(8, 3, 4, 4) + labels = (torch.rand(8, 4, 4) * 3).long() + dice_loss(logits, labels) + + # test loss with class weights from file + import os + import tempfile + + import mmcv + import numpy as np + tmp_file = tempfile.NamedTemporaryFile() + + mmcv.dump([1.0, 2.0, 3.0], f'{tmp_file.name}.pkl', 'pkl') # from pkl file + loss_cfg = dict( + type='DiceLoss', + reduction='none', + class_weight=f'{tmp_file.name}.pkl', + loss_weight=1.0, + ignore_index=1, + loss_name='loss_dice') + dice_loss = build_loss(loss_cfg) + dice_loss(logits, labels, ignore_index=None) + + np.save(f'{tmp_file.name}.npy', np.array([1.0, 2.0, 3.0])) # from npy file + loss_cfg = dict( + type='DiceLoss', + reduction='none', + class_weight=f'{tmp_file.name}.pkl', + loss_weight=1.0, + ignore_index=1, + loss_name='loss_dice') + dice_loss = build_loss(loss_cfg) + dice_loss(logits, labels, ignore_index=None) + tmp_file.close() + os.remove(f'{tmp_file.name}.pkl') + os.remove(f'{tmp_file.name}.npy') + + # test dice loss with loss_type = 'binary' + loss_cfg = dict( + type='DiceLoss', + smooth=2, + exponent=3, + reduction='sum', + loss_weight=1.0, + ignore_index=0, + loss_name='loss_dice') + dice_loss = build_loss(loss_cfg) + logits = torch.rand(8, 2, 4, 4) + labels = (torch.rand(8, 4, 4) * 2).long() + dice_loss(logits, labels) + + # test dice loss has name `loss_dice` + loss_cfg = dict( + type='DiceLoss', + smooth=2, + exponent=3, + reduction='sum', + loss_weight=1.0, + ignore_index=0, + loss_name='loss_dice') + dice_loss = build_loss(loss_cfg) + assert dice_loss.loss_name == 'loss_dice' diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_focal_loss.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_focal_loss.py new file mode 100644 index 0000000..687312b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_focal_loss.py @@ -0,0 +1,216 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch +import torch.nn.functional as F + +from mmseg.models import build_loss + + +# test focal loss with use_sigmoid=False +def test_use_sigmoid(): + # can't init with use_sigmoid=True + with pytest.raises(AssertionError): + loss_cfg = dict(type='FocalLoss', use_sigmoid=False) + build_loss(loss_cfg) + + # can't forward with use_sigmoid=True + with pytest.raises(NotImplementedError): + loss_cfg = dict(type='FocalLoss', use_sigmoid=True) + focal_loss = build_loss(loss_cfg) + focal_loss.use_sigmoid = False + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + focal_loss(fake_pred, fake_target) + + +# reduction type must be 'none', 'mean' or 'sum' +def test_wrong_reduction_type(): + # can't init with wrong reduction + with pytest.raises(AssertionError): + loss_cfg = dict(type='FocalLoss', reduction='test') + build_loss(loss_cfg) + + # can't forward with wrong reduction override + with pytest.raises(AssertionError): + loss_cfg = dict(type='FocalLoss') + focal_loss = build_loss(loss_cfg) + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + focal_loss(fake_pred, fake_target, reduction_override='test') + + +# test focal loss can handle input parameters with +# unacceptable types +def test_unacceptable_parameters(): + with pytest.raises(AssertionError): + loss_cfg = dict(type='FocalLoss', gamma='test') + build_loss(loss_cfg) + with pytest.raises(AssertionError): + loss_cfg = dict(type='FocalLoss', alpha='test') + build_loss(loss_cfg) + with pytest.raises(AssertionError): + loss_cfg = dict(type='FocalLoss', class_weight='test') + build_loss(loss_cfg) + with pytest.raises(AssertionError): + loss_cfg = dict(type='FocalLoss', loss_weight='test') + build_loss(loss_cfg) + with pytest.raises(AssertionError): + loss_cfg = dict(type='FocalLoss', loss_name=123) + build_loss(loss_cfg) + + +# test if focal loss can be correctly initialize +def test_init_focal_loss(): + loss_cfg = dict( + type='FocalLoss', + use_sigmoid=True, + gamma=3.0, + alpha=3.0, + class_weight=[1, 2, 3, 4], + reduction='sum') + focal_loss = build_loss(loss_cfg) + assert focal_loss.use_sigmoid is True + assert focal_loss.gamma == 3.0 + assert focal_loss.alpha == 3.0 + assert focal_loss.reduction == 'sum' + assert focal_loss.class_weight == [1, 2, 3, 4] + assert focal_loss.loss_weight == 1.0 + assert focal_loss.loss_name == 'loss_focal' + + +# test reduction override +def test_reduction_override(): + loss_cfg = dict(type='FocalLoss', reduction='mean') + focal_loss = build_loss(loss_cfg) + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + loss = focal_loss(fake_pred, fake_target, reduction_override='none') + assert loss.shape == fake_pred.shape + + +# test wrong pred and target shape +def test_wrong_pred_and_target_shape(): + loss_cfg = dict(type='FocalLoss') + focal_loss = build_loss(loss_cfg) + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 2, 2)) + fake_target = F.one_hot(fake_target, num_classes=4) + fake_target = fake_target.permute(0, 3, 1, 2) + with pytest.raises(AssertionError): + focal_loss(fake_pred, fake_target) + + +# test forward with different shape of target +def test_forward_with_different_shape_of_target(): + loss_cfg = dict(type='FocalLoss') + focal_loss = build_loss(loss_cfg) + + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + loss1 = focal_loss(fake_pred, fake_target) + + fake_target = F.one_hot(fake_target, num_classes=4) + fake_target = fake_target.permute(0, 3, 1, 2) + loss2 = focal_loss(fake_pred, fake_target) + assert loss1 == loss2 + + +# test forward with weight +def test_forward_with_weight(): + loss_cfg = dict(type='FocalLoss') + focal_loss = build_loss(loss_cfg) + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + weight = torch.rand(3 * 5 * 6, 1) + loss1 = focal_loss(fake_pred, fake_target, weight=weight) + + weight2 = weight.view(-1) + loss2 = focal_loss(fake_pred, fake_target, weight=weight2) + + weight3 = weight.expand(3 * 5 * 6, 4) + loss3 = focal_loss(fake_pred, fake_target, weight=weight3) + assert loss1 == loss2 == loss3 + + +# test none reduction type +def test_none_reduction_type(): + loss_cfg = dict(type='FocalLoss', reduction='none') + focal_loss = build_loss(loss_cfg) + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + loss = focal_loss(fake_pred, fake_target) + assert loss.shape == fake_pred.shape + + +# test the usage of class weight +def test_class_weight(): + loss_cfg_cw = dict( + type='FocalLoss', reduction='none', class_weight=[1.0, 2.0, 3.0, 4.0]) + loss_cfg = dict(type='FocalLoss', reduction='none') + focal_loss_cw = build_loss(loss_cfg_cw) + focal_loss = build_loss(loss_cfg) + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + loss_cw = focal_loss_cw(fake_pred, fake_target) + loss = focal_loss(fake_pred, fake_target) + weight = torch.tensor([1, 2, 3, 4]).view(1, 4, 1, 1) + assert (loss * weight == loss_cw).all() + + +# test ignore index +def test_ignore_index(): + loss_cfg = dict(type='FocalLoss', reduction='none') + # ignore_index within C classes + focal_loss = build_loss(loss_cfg) + fake_pred = torch.rand(3, 5, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + dim1 = torch.randint(0, 3, (4, )) + dim2 = torch.randint(0, 5, (4, )) + dim3 = torch.randint(0, 6, (4, )) + fake_target[dim1, dim2, dim3] = 4 + loss1 = focal_loss(fake_pred, fake_target, ignore_index=4) + one_hot_target = F.one_hot(fake_target, num_classes=5) + one_hot_target = one_hot_target.permute(0, 3, 1, 2) + loss2 = focal_loss(fake_pred, one_hot_target, ignore_index=4) + assert (loss1 == loss2).all() + assert (loss1[dim1, :, dim2, dim3] == 0).all() + assert (loss2[dim1, :, dim2, dim3] == 0).all() + + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + loss1 = focal_loss(fake_pred, fake_target, ignore_index=2) + one_hot_target = F.one_hot(fake_target, num_classes=4) + one_hot_target = one_hot_target.permute(0, 3, 1, 2) + loss2 = focal_loss(fake_pred, one_hot_target, ignore_index=2) + ignore_mask = one_hot_target == 2 + assert (loss1 == loss2).all() + assert torch.sum(loss1 * ignore_mask) == 0 + assert torch.sum(loss2 * ignore_mask) == 0 + + # ignore index is not in prediction's classes + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + dim1 = torch.randint(0, 3, (4, )) + dim2 = torch.randint(0, 5, (4, )) + dim3 = torch.randint(0, 6, (4, )) + fake_target[dim1, dim2, dim3] = 255 + loss1 = focal_loss(fake_pred, fake_target, ignore_index=255) + assert (loss1[dim1, :, dim2, dim3] == 0).all() + + +# test list alpha +def test_alpha(): + loss_cfg = dict(type='FocalLoss') + focal_loss = build_loss(loss_cfg) + alpha_float = 0.4 + alpha = [0.4, 0.4, 0.4, 0.4] + alpha2 = [0.1, 0.3, 0.2, 0.1] + fake_pred = torch.rand(3, 4, 5, 6) + fake_target = torch.randint(0, 4, (3, 5, 6)) + focal_loss.alpha = alpha_float + loss1 = focal_loss(fake_pred, fake_target) + focal_loss.alpha = alpha + loss2 = focal_loss(fake_pred, fake_target) + assert loss1 == loss2 + focal_loss.alpha = alpha2 + focal_loss(fake_pred, fake_target) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_lovasz_loss.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_lovasz_loss.py new file mode 100644 index 0000000..bea3f4b --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_lovasz_loss.py @@ -0,0 +1,118 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + + +def test_lovasz_loss(): + from mmseg.models import build_loss + + # loss_type should be 'binary' or 'multi_class' + with pytest.raises(AssertionError): + loss_cfg = dict( + type='LovaszLoss', + loss_type='Binary', + reduction='none', + loss_weight=1.0, + loss_name='loss_lovasz') + build_loss(loss_cfg) + + # reduction should be 'none' when per_image is False. + with pytest.raises(AssertionError): + loss_cfg = dict( + type='LovaszLoss', + loss_type='multi_class', + loss_name='loss_lovasz') + build_loss(loss_cfg) + + # test lovasz loss with loss_type = 'multi_class' and per_image = False + loss_cfg = dict( + type='LovaszLoss', + reduction='none', + loss_weight=1.0, + loss_name='loss_lovasz') + lovasz_loss = build_loss(loss_cfg) + logits = torch.rand(1, 3, 4, 4) + labels = (torch.rand(1, 4, 4) * 2).long() + lovasz_loss(logits, labels) + + # test lovasz loss with loss_type = 'multi_class' and per_image = True + loss_cfg = dict( + type='LovaszLoss', + per_image=True, + reduction='mean', + class_weight=[1.0, 2.0, 3.0], + loss_weight=1.0, + loss_name='loss_lovasz') + lovasz_loss = build_loss(loss_cfg) + logits = torch.rand(1, 3, 4, 4) + labels = (torch.rand(1, 4, 4) * 2).long() + lovasz_loss(logits, labels, ignore_index=None) + + # test loss with class weights from file + import os + import tempfile + + import mmcv + import numpy as np + tmp_file = tempfile.NamedTemporaryFile() + + mmcv.dump([1.0, 2.0, 3.0], f'{tmp_file.name}.pkl', 'pkl') # from pkl file + loss_cfg = dict( + type='LovaszLoss', + per_image=True, + reduction='mean', + class_weight=f'{tmp_file.name}.pkl', + loss_weight=1.0, + loss_name='loss_lovasz') + lovasz_loss = build_loss(loss_cfg) + lovasz_loss(logits, labels, ignore_index=None) + + np.save(f'{tmp_file.name}.npy', np.array([1.0, 2.0, 3.0])) # from npy file + loss_cfg = dict( + type='LovaszLoss', + per_image=True, + reduction='mean', + class_weight=f'{tmp_file.name}.npy', + loss_weight=1.0, + loss_name='loss_lovasz') + lovasz_loss = build_loss(loss_cfg) + lovasz_loss(logits, labels, ignore_index=None) + tmp_file.close() + os.remove(f'{tmp_file.name}.pkl') + os.remove(f'{tmp_file.name}.npy') + + # test lovasz loss with loss_type = 'binary' and per_image = False + loss_cfg = dict( + type='LovaszLoss', + loss_type='binary', + reduction='none', + loss_weight=1.0, + loss_name='loss_lovasz') + lovasz_loss = build_loss(loss_cfg) + logits = torch.rand(2, 4, 4) + labels = (torch.rand(2, 4, 4)).long() + lovasz_loss(logits, labels) + + # test lovasz loss with loss_type = 'binary' and per_image = True + loss_cfg = dict( + type='LovaszLoss', + loss_type='binary', + per_image=True, + reduction='mean', + loss_weight=1.0, + loss_name='loss_lovasz') + lovasz_loss = build_loss(loss_cfg) + logits = torch.rand(2, 4, 4) + labels = (torch.rand(2, 4, 4)).long() + lovasz_loss(logits, labels, ignore_index=None) + + # test lovasz loss has name `loss_lovasz` + loss_cfg = dict( + type='LovaszLoss', + loss_type='binary', + per_image=True, + reduction='mean', + loss_weight=1.0, + loss_name='loss_lovasz') + lovasz_loss = build_loss(loss_cfg) + assert lovasz_loss.loss_name == 'loss_lovasz' diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_utils.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_utils.py new file mode 100644 index 0000000..ab9927f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_losses/test_utils.py @@ -0,0 +1,129 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import pytest +import torch + +from mmseg.models.losses import Accuracy, reduce_loss, weight_reduce_loss + + +def test_weight_reduce_loss(): + loss = torch.rand(1, 3, 4, 4) + weight = torch.zeros(1, 3, 4, 4) + weight[:, :, :2, :2] = 1 + + # test reduce_loss() + reduced = reduce_loss(loss, 'none') + assert reduced is loss + + reduced = reduce_loss(loss, 'mean') + np.testing.assert_almost_equal(reduced.numpy(), loss.mean()) + + reduced = reduce_loss(loss, 'sum') + np.testing.assert_almost_equal(reduced.numpy(), loss.sum()) + + # test weight_reduce_loss() + reduced = weight_reduce_loss(loss, weight=None, reduction='none') + assert reduced is loss + + reduced = weight_reduce_loss(loss, weight=weight, reduction='mean') + target = (loss * weight).mean() + np.testing.assert_almost_equal(reduced.numpy(), target) + + reduced = weight_reduce_loss(loss, weight=weight, reduction='sum') + np.testing.assert_almost_equal(reduced.numpy(), (loss * weight).sum()) + + with pytest.raises(AssertionError): + weight_wrong = weight[0, 0, ...] + weight_reduce_loss(loss, weight=weight_wrong, reduction='mean') + + with pytest.raises(AssertionError): + weight_wrong = weight[:, 0:2, ...] + weight_reduce_loss(loss, weight=weight_wrong, reduction='mean') + + +def test_accuracy(): + # test for empty pred + pred = torch.empty(0, 4) + label = torch.empty(0) + accuracy = Accuracy(topk=1) + acc = accuracy(pred, label) + assert acc.item() == 0 + + pred = torch.Tensor([[0.2, 0.3, 0.6, 0.5], [0.1, 0.1, 0.2, 0.6], + [0.9, 0.0, 0.0, 0.1], [0.4, 0.7, 0.1, 0.1], + [0.0, 0.0, 0.99, 0]]) + # test for ignore_index + true_label = torch.Tensor([2, 3, 0, 1, 2]).long() + accuracy = Accuracy(topk=1, ignore_index=None) + acc = accuracy(pred, true_label) + assert torch.allclose(acc, torch.tensor(100.0)) + + # test for ignore_index with a wrong prediction of that index + true_label = torch.Tensor([2, 3, 1, 1, 2]).long() + accuracy = Accuracy(topk=1, ignore_index=1) + acc = accuracy(pred, true_label) + assert torch.allclose(acc, torch.tensor(100.0)) + + # test for ignore_index 1 with a wrong prediction of other index + true_label = torch.Tensor([2, 0, 0, 1, 2]).long() + accuracy = Accuracy(topk=1, ignore_index=1) + acc = accuracy(pred, true_label) + assert torch.allclose(acc, torch.tensor(75.0)) + + # test for ignore_index 4 with a wrong prediction of other index + true_label = torch.Tensor([2, 0, 0, 1, 2]).long() + accuracy = Accuracy(topk=1, ignore_index=4) + acc = accuracy(pred, true_label) + assert torch.allclose(acc, torch.tensor(80.0)) + + # test for ignoring all the pixels + true_label = torch.Tensor([2, 2, 2, 2, 2]).long() + accuracy = Accuracy(topk=1, ignore_index=2) + acc = accuracy(pred, true_label) + assert torch.allclose(acc, torch.tensor(100.0)) + + # test for top1 + true_label = torch.Tensor([2, 3, 0, 1, 2]).long() + accuracy = Accuracy(topk=1) + acc = accuracy(pred, true_label) + assert torch.allclose(acc, torch.tensor(100.0)) + + # test for top1 with score thresh=0.8 + true_label = torch.Tensor([2, 3, 0, 1, 2]).long() + accuracy = Accuracy(topk=1, thresh=0.8) + acc = accuracy(pred, true_label) + assert torch.allclose(acc, torch.tensor(40.0)) + + # test for top2 + accuracy = Accuracy(topk=2) + label = torch.Tensor([3, 2, 0, 0, 2]).long() + acc = accuracy(pred, label) + assert torch.allclose(acc, torch.tensor(100.0)) + + # test for both top1 and top2 + accuracy = Accuracy(topk=(1, 2)) + true_label = torch.Tensor([2, 3, 0, 1, 2]).long() + acc = accuracy(pred, true_label) + for a in acc: + assert torch.allclose(a, torch.tensor(100.0)) + + # topk is larger than pred class number + with pytest.raises(AssertionError): + accuracy = Accuracy(topk=5) + accuracy(pred, true_label) + + # wrong topk type + with pytest.raises(AssertionError): + accuracy = Accuracy(topk='wrong type') + accuracy(pred, true_label) + + # label size is larger than required + with pytest.raises(AssertionError): + label = torch.Tensor([2, 3, 0, 1, 2, 0]).long() # size mismatch + accuracy = Accuracy() + accuracy(pred, label) + + # wrong pred dimension + with pytest.raises(AssertionError): + accuracy = Accuracy() + accuracy(pred[:, :, None], true_label) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/__init__.py new file mode 100644 index 0000000..ef101fe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/__init__.py @@ -0,0 +1 @@ +# Copyright (c) OpenMMLab. All rights reserved. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_feature2pyramid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_feature2pyramid.py new file mode 100644 index 0000000..44fd02c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_feature2pyramid.py @@ -0,0 +1,38 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models import Feature2Pyramid + + +def test_feature2pyramid(): + # test + rescales = [4, 2, 1, 0.5] + embed_dim = 64 + inputs = [torch.randn(1, embed_dim, 32, 32) for i in range(len(rescales))] + + fpn = Feature2Pyramid( + embed_dim, rescales, norm_cfg=dict(type='BN', requires_grad=True)) + outputs = fpn(inputs) + assert outputs[0].shape == torch.Size([1, 64, 128, 128]) + assert outputs[1].shape == torch.Size([1, 64, 64, 64]) + assert outputs[2].shape == torch.Size([1, 64, 32, 32]) + assert outputs[3].shape == torch.Size([1, 64, 16, 16]) + + # test rescales = [2, 1, 0.5, 0.25] + rescales = [2, 1, 0.5, 0.25] + inputs = [torch.randn(1, embed_dim, 32, 32) for i in range(len(rescales))] + + fpn = Feature2Pyramid( + embed_dim, rescales, norm_cfg=dict(type='BN', requires_grad=True)) + outputs = fpn(inputs) + assert outputs[0].shape == torch.Size([1, 64, 64, 64]) + assert outputs[1].shape == torch.Size([1, 64, 32, 32]) + assert outputs[2].shape == torch.Size([1, 64, 16, 16]) + assert outputs[3].shape == torch.Size([1, 64, 8, 8]) + + # test rescales = [4, 2, 0.25, 0] + rescales = [4, 2, 0.25, 0] + with pytest.raises(KeyError): + fpn = Feature2Pyramid( + embed_dim, rescales, norm_cfg=dict(type='BN', requires_grad=True)) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_fpn.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_fpn.py new file mode 100644 index 0000000..c294006 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_fpn.py @@ -0,0 +1,30 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models import FPN + + +def test_fpn(): + in_channels = [64, 128, 256, 512] + inputs = [ + torch.randn(1, c, 56 // 2**i, 56 // 2**i) + for i, c in enumerate(in_channels) + ] + + fpn = FPN(in_channels, 64, len(in_channels)) + outputs = fpn(inputs) + assert outputs[0].shape == torch.Size([1, 64, 56, 56]) + assert outputs[1].shape == torch.Size([1, 64, 28, 28]) + assert outputs[2].shape == torch.Size([1, 64, 14, 14]) + assert outputs[3].shape == torch.Size([1, 64, 7, 7]) + + fpn = FPN( + in_channels, + 64, + len(in_channels), + upsample_cfg=dict(mode='nearest', scale_factor=2.0)) + outputs = fpn(inputs) + assert outputs[0].shape == torch.Size([1, 64, 56, 56]) + assert outputs[1].shape == torch.Size([1, 64, 28, 28]) + assert outputs[2].shape == torch.Size([1, 64, 14, 14]) + assert outputs[3].shape == torch.Size([1, 64, 7, 7]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_ic_neck.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_ic_neck.py new file mode 100644 index 0000000..3d13008 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_ic_neck.py @@ -0,0 +1,53 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.necks import ICNeck +from mmseg.models.necks.ic_neck import CascadeFeatureFusion +from ..test_heads.utils import _conv_has_norm, to_cuda + + +def test_ic_neck(): + # test with norm_cfg + neck = ICNeck( + in_channels=(4, 16, 16), + out_channels=8, + norm_cfg=dict(type='SyncBN'), + align_corners=False) + assert _conv_has_norm(neck, sync_bn=True) + + inputs = [ + torch.randn(1, 4, 32, 64), + torch.randn(1, 16, 16, 32), + torch.randn(1, 16, 8, 16) + ] + neck = ICNeck( + in_channels=(4, 16, 16), + out_channels=4, + norm_cfg=dict(type='BN', requires_grad=True), + align_corners=False) + if torch.cuda.is_available(): + neck, inputs = to_cuda(neck, inputs) + + outputs = neck(inputs) + assert outputs[0].shape == (1, 4, 16, 32) + assert outputs[1].shape == (1, 4, 32, 64) + assert outputs[1].shape == (1, 4, 32, 64) + + +def test_ic_neck_cascade_feature_fusion(): + cff = CascadeFeatureFusion(64, 64, 32) + assert cff.conv_low.in_channels == 64 + assert cff.conv_low.out_channels == 32 + assert cff.conv_high.in_channels == 64 + assert cff.conv_high.out_channels == 32 + + +def test_ic_neck_input_channels(): + with pytest.raises(AssertionError): + # ICNet Neck input channel constraints. + ICNeck( + in_channels=(16, 64, 64, 64), + out_channels=32, + norm_cfg=dict(type='BN', requires_grad=True), + align_corners=False) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_jpu.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_jpu.py new file mode 100644 index 0000000..4c3fa9f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_jpu.py @@ -0,0 +1,46 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.necks import JPU + + +def test_fastfcn_neck(): + # Test FastFCN Standard Forward + model = JPU( + in_channels=(64, 128, 256), + mid_channels=64, + start_level=0, + end_level=-1, + dilations=(1, 2, 4, 8), + ) + model.init_weights() + model.train() + batch_size = 1 + input = [ + torch.randn(batch_size, 64, 64, 128), + torch.randn(batch_size, 128, 32, 64), + torch.randn(batch_size, 256, 16, 32) + ] + feat = model(input) + + assert len(feat) == 3 + assert feat[0].shape == torch.Size([batch_size, 64, 64, 128]) + assert feat[1].shape == torch.Size([batch_size, 128, 32, 64]) + assert feat[2].shape == torch.Size([batch_size, 256, 64, 128]) + + with pytest.raises(AssertionError): + # FastFCN input and in_channels constraints. + JPU(in_channels=(256, 64, 128), start_level=0, end_level=5) + + # Test not default start_level + model = JPU(in_channels=(64, 128, 256), start_level=1, end_level=-1) + input = [ + torch.randn(batch_size, 64, 64, 128), + torch.randn(batch_size, 128, 32, 64), + torch.randn(batch_size, 256, 16, 32) + ] + feat = model(input) + assert len(feat) == 2 + assert feat[0].shape == torch.Size([batch_size, 128, 32, 64]) + assert feat[1].shape == torch.Size([batch_size, 2048, 32, 64]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_mla_neck.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_mla_neck.py new file mode 100644 index 0000000..e385418 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_mla_neck.py @@ -0,0 +1,16 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models import MLANeck + + +def test_mla(): + in_channels = [4, 4, 4, 4] + mla = MLANeck(in_channels, 32) + + inputs = [torch.randn(1, c, 12, 12) for i, c in enumerate(in_channels)] + outputs = mla(inputs) + assert outputs[0].shape == torch.Size([1, 32, 12, 12]) + assert outputs[1].shape == torch.Size([1, 32, 12, 12]) + assert outputs[2].shape == torch.Size([1, 32, 12, 12]) + assert outputs[3].shape == torch.Size([1, 32, 12, 12]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_multilevel_neck.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_multilevel_neck.py new file mode 100644 index 0000000..9c71d51 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_necks/test_multilevel_neck.py @@ -0,0 +1,32 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models import MultiLevelNeck + + +def test_multilevel_neck(): + + # Test init_weights + MultiLevelNeck([266], 32).init_weights() + + # Test multi feature maps + in_channels = [32, 64, 128, 256] + inputs = [torch.randn(1, c, 14, 14) for i, c in enumerate(in_channels)] + + neck = MultiLevelNeck(in_channels, 32) + outputs = neck(inputs) + assert outputs[0].shape == torch.Size([1, 32, 7, 7]) + assert outputs[1].shape == torch.Size([1, 32, 14, 14]) + assert outputs[2].shape == torch.Size([1, 32, 28, 28]) + assert outputs[3].shape == torch.Size([1, 32, 56, 56]) + + # Test one feature map + in_channels = [768] + inputs = [torch.randn(1, 768, 14, 14)] + + neck = MultiLevelNeck(in_channels, 32) + outputs = neck(inputs) + assert outputs[0].shape == torch.Size([1, 32, 7, 7]) + assert outputs[1].shape == torch.Size([1, 32, 14, 14]) + assert outputs[2].shape == torch.Size([1, 32, 28, 28]) + assert outputs[3].shape == torch.Size([1, 32, 56, 56]) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/__init__.py new file mode 100644 index 0000000..ef101fe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/__init__.py @@ -0,0 +1 @@ +# Copyright (c) OpenMMLab. All rights reserved. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/test_cascade_encoder_decoder.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/test_cascade_encoder_decoder.py new file mode 100644 index 0000000..07ad5c3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/test_cascade_encoder_decoder.py @@ -0,0 +1,57 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv import ConfigDict + +from mmseg.models import build_segmentor +from .utils import _segmentor_forward_train_test + + +def test_cascade_encoder_decoder(): + + # test 1 decode head, w.o. aux head + cfg = ConfigDict( + type='CascadeEncoderDecoder', + num_stages=2, + backbone=dict(type='ExampleBackbone'), + decode_head=[ + dict(type='ExampleDecodeHead'), + dict(type='ExampleCascadeDecodeHead') + ]) + cfg.test_cfg = ConfigDict(mode='whole') + segmentor = build_segmentor(cfg) + _segmentor_forward_train_test(segmentor) + + # test slide mode + cfg.test_cfg = ConfigDict(mode='slide', crop_size=(3, 3), stride=(2, 2)) + segmentor = build_segmentor(cfg) + _segmentor_forward_train_test(segmentor) + + # test 1 decode head, 1 aux head + cfg = ConfigDict( + type='CascadeEncoderDecoder', + num_stages=2, + backbone=dict(type='ExampleBackbone'), + decode_head=[ + dict(type='ExampleDecodeHead'), + dict(type='ExampleCascadeDecodeHead') + ], + auxiliary_head=dict(type='ExampleDecodeHead')) + cfg.test_cfg = ConfigDict(mode='whole') + segmentor = build_segmentor(cfg) + _segmentor_forward_train_test(segmentor) + + # test 1 decode head, 2 aux head + cfg = ConfigDict( + type='CascadeEncoderDecoder', + num_stages=2, + backbone=dict(type='ExampleBackbone'), + decode_head=[ + dict(type='ExampleDecodeHead'), + dict(type='ExampleCascadeDecodeHead') + ], + auxiliary_head=[ + dict(type='ExampleDecodeHead'), + dict(type='ExampleDecodeHead') + ]) + cfg.test_cfg = ConfigDict(mode='whole') + segmentor = build_segmentor(cfg) + _segmentor_forward_train_test(segmentor) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/test_encoder_decoder.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/test_encoder_decoder.py new file mode 100644 index 0000000..4ed1437 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/test_encoder_decoder.py @@ -0,0 +1,47 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from mmcv import ConfigDict + +from mmseg.models import build_segmentor +from .utils import _segmentor_forward_train_test + + +def test_encoder_decoder(): + + # test 1 decode head, w.o. aux head + + cfg = ConfigDict( + type='EncoderDecoder', + backbone=dict(type='ExampleBackbone'), + decode_head=dict(type='ExampleDecodeHead'), + train_cfg=None, + test_cfg=dict(mode='whole')) + segmentor = build_segmentor(cfg) + _segmentor_forward_train_test(segmentor) + + # test slide mode + cfg.test_cfg = ConfigDict(mode='slide', crop_size=(3, 3), stride=(2, 2)) + segmentor = build_segmentor(cfg) + _segmentor_forward_train_test(segmentor) + + # test 1 decode head, 1 aux head + cfg = ConfigDict( + type='EncoderDecoder', + backbone=dict(type='ExampleBackbone'), + decode_head=dict(type='ExampleDecodeHead'), + auxiliary_head=dict(type='ExampleDecodeHead')) + cfg.test_cfg = ConfigDict(mode='whole') + segmentor = build_segmentor(cfg) + _segmentor_forward_train_test(segmentor) + + # test 1 decode head, 2 aux head + cfg = ConfigDict( + type='EncoderDecoder', + backbone=dict(type='ExampleBackbone'), + decode_head=dict(type='ExampleDecodeHead'), + auxiliary_head=[ + dict(type='ExampleDecodeHead'), + dict(type='ExampleDecodeHead') + ]) + cfg.test_cfg = ConfigDict(mode='whole') + segmentor = build_segmentor(cfg) + _segmentor_forward_train_test(segmentor) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/utils.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/utils.py new file mode 100644 index 0000000..1826dbf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_segmentors/utils.py @@ -0,0 +1,140 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import numpy as np +import torch +from torch import nn + +from mmseg.models import BACKBONES, HEADS +from mmseg.models.decode_heads.cascade_decode_head import BaseCascadeDecodeHead +from mmseg.models.decode_heads.decode_head import BaseDecodeHead + + +def _demo_mm_inputs(input_shape=(1, 3, 8, 16), num_classes=10): + """Create a superset of inputs needed to run test or train batches. + + Args: + input_shape (tuple): + input batch dimensions + + num_classes (int): + number of semantic classes + """ + (N, C, H, W) = input_shape + + rng = np.random.RandomState(0) + + imgs = rng.rand(*input_shape) + segs = rng.randint( + low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8) + + img_metas = [{ + 'img_shape': (H, W, C), + 'ori_shape': (H, W, C), + 'pad_shape': (H, W, C), + 'filename': '.png', + 'scale_factor': 1.0, + 'flip': False, + 'flip_direction': 'horizontal' + } for _ in range(N)] + + mm_inputs = { + 'imgs': torch.FloatTensor(imgs), + 'img_metas': img_metas, + 'gt_semantic_seg': torch.LongTensor(segs) + } + return mm_inputs + + +@BACKBONES.register_module() +class ExampleBackbone(nn.Module): + + def __init__(self): + super(ExampleBackbone, self).__init__() + self.conv = nn.Conv2d(3, 3, 3) + + def init_weights(self, pretrained=None): + pass + + def forward(self, x): + return [self.conv(x)] + + +@HEADS.register_module() +class ExampleDecodeHead(BaseDecodeHead): + + def __init__(self): + super(ExampleDecodeHead, self).__init__(3, 3, num_classes=19) + + def forward(self, inputs): + return self.cls_seg(inputs[0]) + + +@HEADS.register_module() +class ExampleCascadeDecodeHead(BaseCascadeDecodeHead): + + def __init__(self): + super(ExampleCascadeDecodeHead, self).__init__(3, 3, num_classes=19) + + def forward(self, inputs, prev_out): + return self.cls_seg(inputs[0]) + + +def _segmentor_forward_train_test(segmentor): + if isinstance(segmentor.decode_head, nn.ModuleList): + num_classes = segmentor.decode_head[-1].num_classes + else: + num_classes = segmentor.decode_head.num_classes + # batch_size=2 for BatchNorm + mm_inputs = _demo_mm_inputs(num_classes=num_classes) + + imgs = mm_inputs.pop('imgs') + img_metas = mm_inputs.pop('img_metas') + gt_semantic_seg = mm_inputs['gt_semantic_seg'] + + # convert to cuda Tensor if applicable + if torch.cuda.is_available(): + segmentor = segmentor.cuda() + imgs = imgs.cuda() + gt_semantic_seg = gt_semantic_seg.cuda() + + # Test forward train + losses = segmentor.forward( + imgs, img_metas, gt_semantic_seg=gt_semantic_seg, return_loss=True) + assert isinstance(losses, dict) + + # Test train_step + data_batch = dict( + img=imgs, img_metas=img_metas, gt_semantic_seg=gt_semantic_seg) + outputs = segmentor.train_step(data_batch, None) + assert isinstance(outputs, dict) + assert 'loss' in outputs + assert 'log_vars' in outputs + assert 'num_samples' in outputs + + # Test val_step + with torch.no_grad(): + segmentor.eval() + data_batch = dict( + img=imgs, img_metas=img_metas, gt_semantic_seg=gt_semantic_seg) + outputs = segmentor.val_step(data_batch, None) + assert isinstance(outputs, dict) + assert 'loss' in outputs + assert 'log_vars' in outputs + assert 'num_samples' in outputs + + # Test forward simple test + with torch.no_grad(): + segmentor.eval() + # pack into lists + img_list = [img[None, :] for img in imgs] + img_meta_list = [[img_meta] for img_meta in img_metas] + segmentor.forward(img_list, img_meta_list, return_loss=False) + + # Test forward aug test + with torch.no_grad(): + segmentor.eval() + # pack into lists + img_list = [img[None, :] for img in imgs] + img_list = img_list + img_list + img_meta_list = [[img_meta] for img_meta in img_metas] + img_meta_list = img_meta_list + img_meta_list + segmentor.forward(img_list, img_meta_list, return_loss=False) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_utils/__init__.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_utils/__init__.py new file mode 100644 index 0000000..ef101fe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_utils/__init__.py @@ -0,0 +1 @@ +# Copyright (c) OpenMMLab. All rights reserved. diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_utils/test_embed.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_utils/test_embed.py new file mode 100644 index 0000000..be20c97 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_utils/test_embed.py @@ -0,0 +1,461 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.models.utils.embed import AdaptivePadding, PatchEmbed, PatchMerging + + +def test_adaptive_padding(): + + for padding in ('same', 'corner'): + kernel_size = 16 + stride = 16 + dilation = 1 + input = torch.rand(1, 1, 15, 17) + adap_pool = AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding) + out = adap_pool(input) + # padding to divisible by 16 + assert (out.shape[2], out.shape[3]) == (16, 32) + input = torch.rand(1, 1, 16, 17) + out = adap_pool(input) + # padding to divisible by 16 + assert (out.shape[2], out.shape[3]) == (16, 32) + + kernel_size = (2, 2) + stride = (2, 2) + dilation = (1, 1) + + adap_pad = AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding) + input = torch.rand(1, 1, 11, 13) + out = adap_pad(input) + # padding to divisible by 2 + assert (out.shape[2], out.shape[3]) == (12, 14) + + kernel_size = (2, 2) + stride = (10, 10) + dilation = (1, 1) + + adap_pad = AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding) + input = torch.rand(1, 1, 10, 13) + out = adap_pad(input) + # no padding + assert (out.shape[2], out.shape[3]) == (10, 13) + + kernel_size = (11, 11) + adap_pad = AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding) + input = torch.rand(1, 1, 11, 13) + out = adap_pad(input) + # all padding + assert (out.shape[2], out.shape[3]) == (21, 21) + + # test padding as kernel is (7,9) + input = torch.rand(1, 1, 11, 13) + stride = (3, 4) + kernel_size = (4, 5) + dilation = (2, 2) + # actually (7, 9) + adap_pad = AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding) + dilation_out = adap_pad(input) + assert (dilation_out.shape[2], dilation_out.shape[3]) == (16, 21) + kernel_size = (7, 9) + dilation = (1, 1) + adap_pad = AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=padding) + kernel79_out = adap_pad(input) + assert (kernel79_out.shape[2], kernel79_out.shape[3]) == (16, 21) + assert kernel79_out.shape == dilation_out.shape + + # assert only support "same" "corner" + with pytest.raises(AssertionError): + AdaptivePadding( + kernel_size=kernel_size, + stride=stride, + dilation=dilation, + padding=1) + + +def test_patch_embed(): + B = 2 + H = 3 + W = 4 + C = 3 + embed_dims = 10 + kernel_size = 3 + stride = 1 + dummy_input = torch.rand(B, C, H, W) + patch_merge_1 = PatchEmbed( + in_channels=C, + embed_dims=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=0, + dilation=1, + norm_cfg=None) + + x1, shape = patch_merge_1(dummy_input) + # test out shape + assert x1.shape == (2, 2, 10) + # test outsize is correct + assert shape == (1, 2) + # test L = out_h * out_w + assert shape[0] * shape[1] == x1.shape[1] + + B = 2 + H = 10 + W = 10 + C = 3 + embed_dims = 10 + kernel_size = 5 + stride = 2 + dummy_input = torch.rand(B, C, H, W) + # test dilation + patch_merge_2 = PatchEmbed( + in_channels=C, + embed_dims=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=0, + dilation=2, + norm_cfg=None, + ) + + x2, shape = patch_merge_2(dummy_input) + # test out shape + assert x2.shape == (2, 1, 10) + # test outsize is correct + assert shape == (1, 1) + # test L = out_h * out_w + assert shape[0] * shape[1] == x2.shape[1] + + stride = 2 + input_size = (10, 10) + + dummy_input = torch.rand(B, C, H, W) + # test stride and norm + patch_merge_3 = PatchEmbed( + in_channels=C, + embed_dims=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=0, + dilation=2, + norm_cfg=dict(type='LN'), + input_size=input_size) + + x3, shape = patch_merge_3(dummy_input) + # test out shape + assert x3.shape == (2, 1, 10) + # test outsize is correct + assert shape == (1, 1) + # test L = out_h * out_w + assert shape[0] * shape[1] == x3.shape[1] + + # test the init_out_size with nn.Unfold + assert patch_merge_3.init_out_size[1] == (input_size[0] - 2 * 4 - + 1) // 2 + 1 + assert patch_merge_3.init_out_size[0] == (input_size[0] - 2 * 4 - + 1) // 2 + 1 + H = 11 + W = 12 + input_size = (H, W) + dummy_input = torch.rand(B, C, H, W) + # test stride and norm + patch_merge_3 = PatchEmbed( + in_channels=C, + embed_dims=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=0, + dilation=2, + norm_cfg=dict(type='LN'), + input_size=input_size) + + _, shape = patch_merge_3(dummy_input) + # when input_size equal to real input + # the out_size should be equal to `init_out_size` + assert shape == patch_merge_3.init_out_size + + input_size = (H, W) + dummy_input = torch.rand(B, C, H, W) + # test stride and norm + patch_merge_3 = PatchEmbed( + in_channels=C, + embed_dims=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=0, + dilation=2, + norm_cfg=dict(type='LN'), + input_size=input_size) + + _, shape = patch_merge_3(dummy_input) + # when input_size equal to real input + # the out_size should be equal to `init_out_size` + assert shape == patch_merge_3.init_out_size + + # test adap padding + for padding in ('same', 'corner'): + in_c = 2 + embed_dims = 3 + B = 2 + + # test stride is 1 + input_size = (5, 5) + kernel_size = (5, 5) + stride = (1, 1) + dilation = 1 + bias = False + + x = torch.rand(B, in_c, *input_size) + patch_embed = PatchEmbed( + in_channels=in_c, + embed_dims=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + + x_out, out_size = patch_embed(x) + assert x_out.size() == (B, 25, 3) + assert out_size == (5, 5) + assert x_out.size(1) == out_size[0] * out_size[1] + + # test kernel_size == stride + input_size = (5, 5) + kernel_size = (5, 5) + stride = (5, 5) + dilation = 1 + bias = False + + x = torch.rand(B, in_c, *input_size) + patch_embed = PatchEmbed( + in_channels=in_c, + embed_dims=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + + x_out, out_size = patch_embed(x) + assert x_out.size() == (B, 1, 3) + assert out_size == (1, 1) + assert x_out.size(1) == out_size[0] * out_size[1] + + # test kernel_size == stride + input_size = (6, 5) + kernel_size = (5, 5) + stride = (5, 5) + dilation = 1 + bias = False + + x = torch.rand(B, in_c, *input_size) + patch_embed = PatchEmbed( + in_channels=in_c, + embed_dims=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + + x_out, out_size = patch_embed(x) + assert x_out.size() == (B, 2, 3) + assert out_size == (2, 1) + assert x_out.size(1) == out_size[0] * out_size[1] + + # test different kernel_size with different stride + input_size = (6, 5) + kernel_size = (6, 2) + stride = (6, 2) + dilation = 1 + bias = False + + x = torch.rand(B, in_c, *input_size) + patch_embed = PatchEmbed( + in_channels=in_c, + embed_dims=embed_dims, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + + x_out, out_size = patch_embed(x) + assert x_out.size() == (B, 3, 3) + assert out_size == (1, 3) + assert x_out.size(1) == out_size[0] * out_size[1] + + +def test_patch_merging(): + + # Test the model with int padding + in_c = 3 + out_c = 4 + kernel_size = 3 + stride = 3 + padding = 1 + dilation = 1 + bias = False + # test the case `pad_to_stride` is False + patch_merge = PatchMerging( + in_channels=in_c, + out_channels=out_c, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + B, L, C = 1, 100, 3 + input_size = (10, 10) + x = torch.rand(B, L, C) + x_out, out_size = patch_merge(x, input_size) + assert x_out.size() == (1, 16, 4) + assert out_size == (4, 4) + # assert out size is consistent with real output + assert x_out.size(1) == out_size[0] * out_size[1] + in_c = 4 + out_c = 5 + kernel_size = 6 + stride = 3 + padding = 2 + dilation = 2 + bias = False + patch_merge = PatchMerging( + in_channels=in_c, + out_channels=out_c, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + B, L, C = 1, 100, 4 + input_size = (10, 10) + x = torch.rand(B, L, C) + x_out, out_size = patch_merge(x, input_size) + assert x_out.size() == (1, 4, 5) + assert out_size == (2, 2) + # assert out size is consistent with real output + assert x_out.size(1) == out_size[0] * out_size[1] + + # Test with adaptive padding + for padding in ('same', 'corner'): + in_c = 2 + out_c = 3 + B = 2 + + # test stride is 1 + input_size = (5, 5) + kernel_size = (5, 5) + stride = (1, 1) + dilation = 1 + bias = False + L = input_size[0] * input_size[1] + + x = torch.rand(B, L, in_c) + patch_merge = PatchMerging( + in_channels=in_c, + out_channels=out_c, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + + x_out, out_size = patch_merge(x, input_size) + assert x_out.size() == (B, 25, 3) + assert out_size == (5, 5) + assert x_out.size(1) == out_size[0] * out_size[1] + + # test kernel_size == stride + input_size = (5, 5) + kernel_size = (5, 5) + stride = (5, 5) + dilation = 1 + bias = False + L = input_size[0] * input_size[1] + + x = torch.rand(B, L, in_c) + patch_merge = PatchMerging( + in_channels=in_c, + out_channels=out_c, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + + x_out, out_size = patch_merge(x, input_size) + assert x_out.size() == (B, 1, 3) + assert out_size == (1, 1) + assert x_out.size(1) == out_size[0] * out_size[1] + + # test kernel_size == stride + input_size = (6, 5) + kernel_size = (5, 5) + stride = (5, 5) + dilation = 1 + bias = False + L = input_size[0] * input_size[1] + + x = torch.rand(B, L, in_c) + patch_merge = PatchMerging( + in_channels=in_c, + out_channels=out_c, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + + x_out, out_size = patch_merge(x, input_size) + assert x_out.size() == (B, 2, 3) + assert out_size == (2, 1) + assert x_out.size(1) == out_size[0] * out_size[1] + + # test different kernel_size with different stride + input_size = (6, 5) + kernel_size = (6, 2) + stride = (6, 2) + dilation = 1 + bias = False + L = input_size[0] * input_size[1] + + x = torch.rand(B, L, in_c) + patch_merge = PatchMerging( + in_channels=in_c, + out_channels=out_c, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + bias=bias) + + x_out, out_size = patch_merge(x, input_size) + assert x_out.size() == (B, 3, 3) + assert out_size == (1, 3) + assert x_out.size(1) == out_size[0] * out_size[1] diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_utils/test_shape_convert.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_utils/test_shape_convert.py new file mode 100644 index 0000000..60e87f3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_models/test_utils/test_shape_convert.py @@ -0,0 +1,89 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch + +from mmseg.models.utils import (nchw2nlc2nchw, nchw_to_nlc, nlc2nchw2nlc, + nlc_to_nchw) + + +def test_nchw2nlc2nchw(): + # Test nchw2nlc2nchw function + shape_nchw = (4, 2, 5, 5) + shape_nlc = (4, 25, 2) + + def test_func(x): + assert x.shape == torch.Size(shape_nlc) + return x + + x = torch.rand(*shape_nchw) + output = nchw2nlc2nchw(test_func, x) + assert output.shape == torch.Size(shape_nchw) + + def test_func2(x, arg): + assert x.shape == torch.Size(shape_nlc) + assert arg == 100 + return x + + x = torch.rand(*shape_nchw) + output = nchw2nlc2nchw(test_func2, x, arg=100) + assert output.shape == torch.Size(shape_nchw) + + def test_func3(x): + assert x.is_contiguous() + assert x.shape == torch.Size(shape_nlc) + return x + + x = torch.rand(*shape_nchw) + output = nchw2nlc2nchw(test_func3, x, contiguous=True) + assert output.shape == torch.Size(shape_nchw) + assert output.is_contiguous() + + +def test_nlc2nchw2nlc(): + # Test nlc2nchw2nlc function + shape_nchw = (4, 2, 5, 5) + shape_nlc = (4, 25, 2) + + def test_func(x): + assert x.shape == torch.Size(shape_nchw) + return x + + x = torch.rand(*shape_nlc) + output = nlc2nchw2nlc(test_func, x, shape_nchw[2:]) + assert output.shape == torch.Size(shape_nlc) + + def test_func2(x, arg): + assert x.shape == torch.Size(shape_nchw) + assert arg == 100 + return x + + x = torch.rand(*shape_nlc) + output = nlc2nchw2nlc(test_func2, x, shape_nchw[2:], arg=100) + assert output.shape == torch.Size(shape_nlc) + + def test_func3(x): + assert x.is_contiguous() + assert x.shape == torch.Size(shape_nchw) + return x + + x = torch.rand(*shape_nlc) + output = nlc2nchw2nlc(test_func3, x, shape_nchw[2:], contiguous=True) + assert output.shape == torch.Size(shape_nlc) + assert output.is_contiguous() + + +def test_nchw_to_nlc(): + # Test nchw_to_nlc function + shape_nchw = (4, 2, 5, 5) + shape_nlc = (4, 25, 2) + x = torch.rand(*shape_nchw) + y = nchw_to_nlc(x) + assert y.shape == torch.Size(shape_nlc) + + +def test_nlc_to_nchw(): + # Test nlc_to_nchw function + shape_nchw = (4, 2, 5, 5) + shape_nlc = (4, 25, 2) + x = torch.rand(*shape_nlc) + y = nlc_to_nchw(x, (5, 5)) + assert y.shape == torch.Size(shape_nchw) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_sampler.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_sampler.py new file mode 100644 index 0000000..1409224 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_sampler.py @@ -0,0 +1,78 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import pytest +import torch + +from mmseg.core import OHEMPixelSampler +from mmseg.models.decode_heads import FCNHead + + +def _context_for_ohem(): + return FCNHead(in_channels=32, channels=16, num_classes=19) + + +def _context_for_ohem_multiple_loss(): + return FCNHead( + in_channels=32, + channels=16, + num_classes=19, + loss_decode=[ + dict(type='CrossEntropyLoss', loss_name='loss_1'), + dict(type='CrossEntropyLoss', loss_name='loss_2') + ]) + + +def test_ohem_sampler(): + + with pytest.raises(AssertionError): + # seg_logit and seg_label must be of the same size + sampler = OHEMPixelSampler(context=_context_for_ohem()) + seg_logit = torch.randn(1, 19, 45, 45) + seg_label = torch.randint(0, 19, size=(1, 1, 89, 89)) + sampler.sample(seg_logit, seg_label) + + # test with thresh + sampler = OHEMPixelSampler( + context=_context_for_ohem(), thresh=0.7, min_kept=200) + seg_logit = torch.randn(1, 19, 45, 45) + seg_label = torch.randint(0, 19, size=(1, 1, 45, 45)) + seg_weight = sampler.sample(seg_logit, seg_label) + assert seg_weight.shape[0] == seg_logit.shape[0] + assert seg_weight.shape[1:] == seg_logit.shape[2:] + assert seg_weight.sum() > 200 + + # test w.o thresh + sampler = OHEMPixelSampler(context=_context_for_ohem(), min_kept=200) + seg_logit = torch.randn(1, 19, 45, 45) + seg_label = torch.randint(0, 19, size=(1, 1, 45, 45)) + seg_weight = sampler.sample(seg_logit, seg_label) + assert seg_weight.shape[0] == seg_logit.shape[0] + assert seg_weight.shape[1:] == seg_logit.shape[2:] + assert seg_weight.sum() == 200 + + # test multiple losses case + with pytest.raises(AssertionError): + # seg_logit and seg_label must be of the same size + sampler = OHEMPixelSampler(context=_context_for_ohem_multiple_loss()) + seg_logit = torch.randn(1, 19, 45, 45) + seg_label = torch.randint(0, 19, size=(1, 1, 89, 89)) + sampler.sample(seg_logit, seg_label) + + # test with thresh in multiple losses case + sampler = OHEMPixelSampler( + context=_context_for_ohem_multiple_loss(), thresh=0.7, min_kept=200) + seg_logit = torch.randn(1, 19, 45, 45) + seg_label = torch.randint(0, 19, size=(1, 1, 45, 45)) + seg_weight = sampler.sample(seg_logit, seg_label) + assert seg_weight.shape[0] == seg_logit.shape[0] + assert seg_weight.shape[1:] == seg_logit.shape[2:] + assert seg_weight.sum() > 200 + + # test w.o thresh in multiple losses case + sampler = OHEMPixelSampler( + context=_context_for_ohem_multiple_loss(), min_kept=200) + seg_logit = torch.randn(1, 19, 45, 45) + seg_label = torch.randint(0, 19, size=(1, 1, 45, 45)) + seg_weight = sampler.sample(seg_logit, seg_label) + assert seg_weight.shape[0] == seg_logit.shape[0] + assert seg_weight.shape[1:] == seg_logit.shape[2:] + assert seg_weight.sum() == 200 diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_utils/test_misc.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_utils/test_misc.py new file mode 100644 index 0000000..7ce1fa6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_utils/test_misc.py @@ -0,0 +1,40 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import tempfile + +from mmseg.utils import find_latest_checkpoint + + +def test_find_latest_checkpoint(): + with tempfile.TemporaryDirectory() as tempdir: + # no checkpoints in the path + path = tempdir + latest = find_latest_checkpoint(path) + assert latest is None + + # The path doesn't exist + path = osp.join(tempdir, 'none') + latest = find_latest_checkpoint(path) + assert latest is None + + # test when latest.pth exists + with tempfile.TemporaryDirectory() as tempdir: + with open(osp.join(tempdir, 'latest.pth'), 'w') as f: + f.write('latest') + path = tempdir + latest = find_latest_checkpoint(path) + assert latest == osp.join(tempdir, 'latest.pth') + + with tempfile.TemporaryDirectory() as tempdir: + for iter in range(1600, 160001, 1600): + with open(osp.join(tempdir, f'iter_{iter}.pth'), 'w') as f: + f.write(f'iter_{iter}.pth') + latest = find_latest_checkpoint(tempdir) + assert latest == osp.join(tempdir, 'iter_160000.pth') + + with tempfile.TemporaryDirectory() as tempdir: + for epoch in range(1, 21): + with open(osp.join(tempdir, f'epoch_{epoch}.pth'), 'w') as f: + f.write(f'epoch_{epoch}.pth') + latest = find_latest_checkpoint(tempdir) + assert latest == osp.join(tempdir, 'epoch_20.pth') diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_utils/test_set_env.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_utils/test_set_env.py new file mode 100644 index 0000000..0af4424 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_utils/test_set_env.py @@ -0,0 +1,85 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import multiprocessing as mp +import os +import platform + +import cv2 +import pytest +from mmcv import Config + +from mmseg.utils import setup_multi_processes + + +@pytest.mark.parametrize('workers_per_gpu', (0, 2)) +@pytest.mark.parametrize(('valid', 'env_cfg'), [(True, + dict( + mp_start_method='fork', + opencv_num_threads=0, + omp_num_threads=1, + mkl_num_threads=1)), + (False, + dict( + mp_start_method=1, + opencv_num_threads=0.1, + omp_num_threads='s', + mkl_num_threads='1'))]) +def test_setup_multi_processes(workers_per_gpu, valid, env_cfg): + # temp save system setting + sys_start_mehod = mp.get_start_method(allow_none=True) + sys_cv_threads = cv2.getNumThreads() + # pop and temp save system env vars + sys_omp_threads = os.environ.pop('OMP_NUM_THREADS', default=None) + sys_mkl_threads = os.environ.pop('MKL_NUM_THREADS', default=None) + + config = dict(data=dict(workers_per_gpu=workers_per_gpu)) + config.update(env_cfg) + cfg = Config(config) + setup_multi_processes(cfg) + + # test when cfg is valid and workers_per_gpu > 0 + # setup_multi_processes will work + if valid and workers_per_gpu > 0: + # test config without setting env + + assert os.getenv('OMP_NUM_THREADS') == str(env_cfg['omp_num_threads']) + assert os.getenv('MKL_NUM_THREADS') == str(env_cfg['mkl_num_threads']) + # when set to 0, the num threads will be 1 + assert cv2.getNumThreads() == env_cfg[ + 'opencv_num_threads'] if env_cfg['opencv_num_threads'] > 0 else 1 + if platform.system() != 'Windows': + assert mp.get_start_method() == env_cfg['mp_start_method'] + + # revert setting to avoid affecting other programs + if sys_start_mehod: + mp.set_start_method(sys_start_mehod, force=True) + cv2.setNumThreads(sys_cv_threads) + if sys_omp_threads: + os.environ['OMP_NUM_THREADS'] = sys_omp_threads + else: + os.environ.pop('OMP_NUM_THREADS') + if sys_mkl_threads: + os.environ['MKL_NUM_THREADS'] = sys_mkl_threads + else: + os.environ.pop('MKL_NUM_THREADS') + + elif valid and workers_per_gpu == 0: + + if platform.system() != 'Windows': + assert mp.get_start_method() == env_cfg['mp_start_method'] + assert cv2.getNumThreads() == env_cfg[ + 'opencv_num_threads'] if env_cfg['opencv_num_threads'] > 0 else 1 + assert 'OMP_NUM_THREADS' not in os.environ + assert 'MKL_NUM_THREADS' not in os.environ + if sys_start_mehod: + mp.set_start_method(sys_start_mehod, force=True) + cv2.setNumThreads(sys_cv_threads) + if sys_omp_threads: + os.environ['OMP_NUM_THREADS'] = sys_omp_threads + if sys_mkl_threads: + os.environ['MKL_NUM_THREADS'] = sys_mkl_threads + + else: + assert mp.get_start_method() == sys_start_mehod + assert cv2.getNumThreads() == sys_cv_threads + assert 'OMP_NUM_THREADS' not in os.environ + assert 'MKL_NUM_THREADS' not in os.environ diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_utils/test_util_distribution.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_utils/test_util_distribution.py new file mode 100644 index 0000000..103d1d6 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tests/test_utils/test_util_distribution.py @@ -0,0 +1,68 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from unittest.mock import MagicMock, patch + +import mmcv +import torch +import torch.nn as nn +from mmcv.parallel import (MMDataParallel, MMDistributedDataParallel, + is_module_wrapper) + +from mmseg import digit_version +from mmseg.utils import build_ddp, build_dp + + +def mock(*args, **kwargs): + pass + + +class Model(nn.Module): + + def __init__(self): + super().__init__() + self.conv = nn.Conv2d(2, 2, 1) + + def forward(self, x): + return self.conv(x) + + +@patch('torch.distributed._broadcast_coalesced', mock) +@patch('torch.distributed.broadcast', mock) +@patch('torch.nn.parallel.DistributedDataParallel._ddp_init_helper', mock) +def test_build_dp(): + model = Model() + assert not is_module_wrapper(model) + + mmdp = build_dp(model, 'cpu') + assert isinstance(mmdp, MMDataParallel) + + if torch.cuda.is_available(): + mmdp = build_dp(model, 'cuda') + assert isinstance(mmdp, MMDataParallel) + + if digit_version(mmcv.__version__) >= digit_version('1.5.0'): + from mmcv.device.mlu import MLUDataParallel + from mmcv.utils import IS_MLU_AVAILABLE + if IS_MLU_AVAILABLE: + mludp = build_dp(model, 'mlu') + assert isinstance(mludp, MLUDataParallel) + + +@patch('torch.distributed._broadcast_coalesced', mock) +@patch('torch.distributed.broadcast', mock) +@patch('torch.nn.parallel.DistributedDataParallel._ddp_init_helper', mock) +def test_build_ddp(): + model = Model() + assert not is_module_wrapper(model) + + if torch.cuda.is_available(): + mmddp = build_ddp( + model, 'cuda', device_id=[0], process_group=MagicMock()) + assert isinstance(mmddp, MMDistributedDataParallel) + + if digit_version(mmcv.__version__) >= digit_version('1.5.0'): + from mmcv.device.mlu import MLUDistributedDataParallel + from mmcv.utils import IS_MLU_AVAILABLE + if IS_MLU_AVAILABLE: + mluddp = build_ddp( + model, 'mlu', device_ids=[0], process_group=MagicMock()) + assert isinstance(mluddp, MLUDistributedDataParallel) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/analyze_logs.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/analyze_logs.py new file mode 100644 index 0000000..e2127d4 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/analyze_logs.py @@ -0,0 +1,128 @@ +# Copyright (c) OpenMMLab. All rights reserved. +"""Modified from https://github.com/open- +mmlab/mmdetection/blob/master/tools/analysis_tools/analyze_logs.py.""" +import argparse +import json +from collections import defaultdict + +import matplotlib.pyplot as plt +import seaborn as sns + + +def plot_curve(log_dicts, args): + if args.backend is not None: + plt.switch_backend(args.backend) + sns.set_style(args.style) + # if legend is None, use {filename}_{key} as legend + legend = args.legend + if legend is None: + legend = [] + for json_log in args.json_logs: + for metric in args.keys: + legend.append(f'{json_log}_{metric}') + assert len(legend) == (len(args.json_logs) * len(args.keys)) + metrics = args.keys + + num_metrics = len(metrics) + for i, log_dict in enumerate(log_dicts): + epochs = list(log_dict.keys()) + for j, metric in enumerate(metrics): + print(f'plot curve of {args.json_logs[i]}, metric is {metric}') + plot_epochs = [] + plot_iters = [] + plot_values = [] + # In some log files exist lines of validation, + # `mode` list is used to only collect iter number + # of training line. + for epoch in epochs: + epoch_logs = log_dict[epoch] + if metric not in epoch_logs.keys(): + continue + if metric in ['mIoU', 'mAcc', 'aAcc']: + plot_epochs.append(epoch) + plot_values.append(epoch_logs[metric][0]) + else: + for idx in range(len(epoch_logs[metric])): + if epoch_logs['mode'][idx] == 'train': + plot_iters.append(epoch_logs['iter'][idx]) + plot_values.append(epoch_logs[metric][idx]) + ax = plt.gca() + label = legend[i * num_metrics + j] + if metric in ['mIoU', 'mAcc', 'aAcc']: + ax.set_xticks(plot_epochs) + plt.xlabel('epoch') + plt.plot(plot_epochs, plot_values, label=label, marker='o') + else: + plt.xlabel('iter') + plt.plot(plot_iters, plot_values, label=label, linewidth=0.5) + plt.legend() + if args.title is not None: + plt.title(args.title) + if args.out is None: + plt.show() + else: + print(f'save curve to: {args.out}') + plt.savefig(args.out) + plt.cla() + + +def parse_args(): + parser = argparse.ArgumentParser(description='Analyze Json Log') + parser.add_argument( + 'json_logs', + type=str, + nargs='+', + help='path of train log in json format') + parser.add_argument( + '--keys', + type=str, + nargs='+', + default=['mIoU'], + help='the metric that you want to plot') + parser.add_argument('--title', type=str, help='title of figure') + parser.add_argument( + '--legend', + type=str, + nargs='+', + default=None, + help='legend of each plot') + parser.add_argument( + '--backend', type=str, default=None, help='backend of plt') + parser.add_argument( + '--style', type=str, default='dark', help='style of plt') + parser.add_argument('--out', type=str, default=None) + args = parser.parse_args() + return args + + +def load_json_logs(json_logs): + # load and convert json_logs to log_dict, key is epoch, value is a sub dict + # keys of sub dict is different metrics + # value of sub dict is a list of corresponding values of all iterations + log_dicts = [dict() for _ in json_logs] + for json_log, log_dict in zip(json_logs, log_dicts): + with open(json_log, 'r') as log_file: + for line in log_file: + log = json.loads(line.strip()) + # skip lines without `epoch` field + if 'epoch' not in log: + continue + epoch = log.pop('epoch') + if epoch not in log_dict: + log_dict[epoch] = defaultdict(list) + for k, v in log.items(): + log_dict[epoch][k].append(v) + return log_dicts + + +def main(): + args = parse_args() + json_logs = args.json_logs + for json_log in json_logs: + assert json_log.endswith('.json') + log_dicts = load_json_logs(json_logs) + plot_curve(log_dicts, args) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/benchmark.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/benchmark.py new file mode 100644 index 0000000..f6d6888 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/benchmark.py @@ -0,0 +1,120 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +import time + +import mmcv +import numpy as np +import torch +from mmcv import Config +from mmcv.parallel import MMDataParallel +from mmcv.runner import load_checkpoint, wrap_fp16_model + +from mmseg.datasets import build_dataloader, build_dataset +from mmseg.models import build_segmentor + + +def parse_args(): + parser = argparse.ArgumentParser(description='MMSeg benchmark a model') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--log-interval', type=int, default=50, help='interval of logging') + parser.add_argument( + '--work-dir', + help=('if specified, the results will be dumped ' + 'into the directory as json')) + parser.add_argument('--repeat-times', type=int, default=1) + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) + if args.work_dir is not None: + mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) + json_file = osp.join(args.work_dir, f'fps_{timestamp}.json') + else: + # use config filename as default work_dir if cfg.work_dir is None + work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + mmcv.mkdir_or_exist(osp.abspath(work_dir)) + json_file = osp.join(work_dir, f'fps_{timestamp}.json') + + repeat_times = args.repeat_times + # set cudnn_benchmark + torch.backends.cudnn.benchmark = False + cfg.model.pretrained = None + cfg.data.test.test_mode = True + + benchmark_dict = dict(config=args.config, unit='img / s') + overall_fps_list = [] + for time_index in range(repeat_times): + print(f'Run {time_index + 1}:') + # build the dataloader + # TODO: support multiple images per gpu (only minor changes are needed) + dataset = build_dataset(cfg.data.test) + data_loader = build_dataloader( + dataset, + samples_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + dist=False, + shuffle=False) + + # build the model and load checkpoint + cfg.model.train_cfg = None + model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + wrap_fp16_model(model) + if 'checkpoint' in args and osp.exists(args.checkpoint): + load_checkpoint(model, args.checkpoint, map_location='cpu') + + model = MMDataParallel(model, device_ids=[0]) + + model.eval() + + # the first several iterations may be very slow so skip them + num_warmup = 5 + pure_inf_time = 0 + total_iters = 200 + + # benchmark with 200 image and take the average + for i, data in enumerate(data_loader): + + torch.cuda.synchronize() + start_time = time.perf_counter() + + with torch.no_grad(): + model(return_loss=False, rescale=True, **data) + + torch.cuda.synchronize() + elapsed = time.perf_counter() - start_time + + if i >= num_warmup: + pure_inf_time += elapsed + if (i + 1) % args.log_interval == 0: + fps = (i + 1 - num_warmup) / pure_inf_time + print(f'Done image [{i + 1:<3}/ {total_iters}], ' + f'fps: {fps:.2f} img / s') + + if (i + 1) == total_iters: + fps = (i + 1 - num_warmup) / pure_inf_time + print(f'Overall fps: {fps:.2f} img / s\n') + benchmark_dict[f'overall_fps_{time_index + 1}'] = round(fps, 2) + overall_fps_list.append(fps) + break + benchmark_dict['average_fps'] = round(np.mean(overall_fps_list), 2) + benchmark_dict['fps_variance'] = round(np.var(overall_fps_list), 4) + print(f'Average fps of {repeat_times} evaluations: ' + f'{benchmark_dict["average_fps"]}') + print(f'The variance of {repeat_times} evaluations: ' + f'{benchmark_dict["fps_variance"]}') + mmcv.dump(benchmark_dict, json_file, indent=4) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/browse_dataset.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/browse_dataset.py new file mode 100644 index 0000000..0aa9430 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/browse_dataset.py @@ -0,0 +1,182 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import warnings +from pathlib import Path + +import mmcv +import numpy as np +from mmcv import Config, DictAction + +from mmseg.datasets.builder import build_dataset + + +def parse_args(): + parser = argparse.ArgumentParser(description='Browse a dataset') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--show-origin', + default=False, + action='store_true', + help='if True, omit all augmentation in pipeline,' + ' show origin image and seg map') + parser.add_argument( + '--skip-type', + type=str, + nargs='+', + default=['DefaultFormatBundle', 'Normalize', 'Collect'], + help='skip some useless pipeline,if `show-origin` is true, ' + 'all pipeline except `Load` will be skipped') + parser.add_argument( + '--output-dir', + default='./output', + type=str, + help='If there is no display interface, you can save it') + parser.add_argument('--show', default=False, action='store_true') + parser.add_argument( + '--show-interval', + type=int, + default=999, + help='the interval of show (ms)') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='the opacity of semantic map') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + + +def imshow_semantic(img, + seg, + class_names, + palette=None, + win_name='', + show=False, + wait_time=0, + out_file=None, + opacity=0.5): + """Draw `result` over `img`. + + Args: + img (str or Tensor): The image to be displayed. + seg (Tensor): The semantic segmentation results to draw over + `img`. + class_names (list[str]): Names of each classes. + palette (list[list[int]]] | np.ndarray | None): The palette of + segmentation map. If None is given, random palette will be + generated. Default: None + win_name (str): The window name. + wait_time (int): Value of waitKey param. + Default: 0. + show (bool): Whether to show the image. + Default: False. + out_file (str or None): The filename to write the image. + Default: None. + opacity(float): Opacity of painted segmentation map. + Default 0.5. + Must be in (0, 1] range. + Returns: + img (Tensor): Only if not `show` or `out_file` + """ + img = mmcv.imread(img) + img = img.copy() + if palette is None: + palette = np.random.randint(0, 255, size=(len(class_names), 3)) + palette = np.array(palette) + assert palette.shape[0] == len(class_names) + assert palette.shape[1] == 3 + assert len(palette.shape) == 2 + assert 0 < opacity <= 1.0 + color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) + for label, color in enumerate(palette): + color_seg[seg == label, :] = color + # convert to BGR + color_seg = color_seg[..., ::-1] + + img = img * (1 - opacity) + color_seg * opacity + img = img.astype(np.uint8) + # if out_file specified, do not show image in window + if out_file is not None: + show = False + + if show: + mmcv.imshow(img, win_name, wait_time) + if out_file is not None: + mmcv.imwrite(img, out_file) + + if not (show or out_file): + warnings.warn('show==False and out_file is not specified, only ' + 'result image will be returned') + return img + + +def _retrieve_data_cfg(_data_cfg, skip_type, show_origin): + if show_origin is True: + # only keep pipeline of Loading data and ann + _data_cfg['pipeline'] = [ + x for x in _data_cfg.pipeline if 'Load' in x['type'] + ] + else: + _data_cfg['pipeline'] = [ + x for x in _data_cfg.pipeline if x['type'] not in skip_type + ] + + +def retrieve_data_cfg(config_path, skip_type, cfg_options, show_origin=False): + cfg = Config.fromfile(config_path) + if cfg_options is not None: + cfg.merge_from_dict(cfg_options) + train_data_cfg = cfg.data.train + if isinstance(train_data_cfg, list): + for _data_cfg in train_data_cfg: + while 'dataset' in _data_cfg and _data_cfg[ + 'type'] != 'MultiImageMixDataset': + _data_cfg = _data_cfg['dataset'] + if 'pipeline' in _data_cfg: + _retrieve_data_cfg(_data_cfg, skip_type, show_origin) + else: + raise ValueError + else: + while 'dataset' in train_data_cfg and train_data_cfg[ + 'type'] != 'MultiImageMixDataset': + train_data_cfg = train_data_cfg['dataset'] + _retrieve_data_cfg(train_data_cfg, skip_type, show_origin) + return cfg + + +def main(): + args = parse_args() + cfg = retrieve_data_cfg(args.config, args.skip_type, args.cfg_options, + args.show_origin) + dataset = build_dataset(cfg.data.train) + progress_bar = mmcv.ProgressBar(len(dataset)) + for item in dataset: + filename = os.path.join(args.output_dir, + Path(item['filename']).name + ) if args.output_dir is not None else None + imshow_semantic( + item['img'], + item['gt_semantic_seg'], + dataset.CLASSES, + dataset.PALETTE, + show=args.show, + wait_time=args.show_interval, + out_file=filename, + opacity=args.opacity, + ) + progress_bar.update() + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/confusion_matrix.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/confusion_matrix.py new file mode 100644 index 0000000..2c5b64c --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/confusion_matrix.py @@ -0,0 +1,184 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os + +import matplotlib.pyplot as plt +import mmcv +import numpy as np +from matplotlib.ticker import MultipleLocator +from mmcv import Config, DictAction + +from mmseg.datasets import build_dataset + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Generate confusion matrix from segmentation results') + parser.add_argument('config', help='test config file path') + parser.add_argument( + 'prediction_path', help='prediction path where test .pkl result') + parser.add_argument( + 'save_dir', help='directory where confusion matrix will be saved') + parser.add_argument( + '--show', action='store_true', help='show confusion matrix') + parser.add_argument( + '--color-theme', + default='winter', + help='theme of the matrix color map') + parser.add_argument( + '--title', + default='Normalized Confusion Matrix', + help='title of the matrix color map') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + return args + + +def calculate_confusion_matrix(dataset, results): + """Calculate the confusion matrix. + + Args: + dataset (Dataset): Test or val dataset. + results (list[ndarray]): A list of segmentation results in each image. + """ + n = len(dataset.CLASSES) + confusion_matrix = np.zeros(shape=[n, n]) + assert len(dataset) == len(results) + prog_bar = mmcv.ProgressBar(len(results)) + for idx, per_img_res in enumerate(results): + res_segm = per_img_res + gt_segm = dataset.get_gt_seg_map_by_idx(idx) + inds = n * gt_segm + res_segm + inds = inds.flatten() + mat = np.bincount(inds, minlength=n**2).reshape(n, n) + confusion_matrix += mat + prog_bar.update() + return confusion_matrix + + +def plot_confusion_matrix(confusion_matrix, + labels, + save_dir=None, + show=True, + title='Normalized Confusion Matrix', + color_theme='winter'): + """Draw confusion matrix with matplotlib. + + Args: + confusion_matrix (ndarray): The confusion matrix. + labels (list[str]): List of class names. + save_dir (str|optional): If set, save the confusion matrix plot to the + given path. Default: None. + show (bool): Whether to show the plot. Default: True. + title (str): Title of the plot. Default: `Normalized Confusion Matrix`. + color_theme (str): Theme of the matrix color map. Default: `winter`. + """ + # normalize the confusion matrix + per_label_sums = confusion_matrix.sum(axis=1)[:, np.newaxis] + confusion_matrix = \ + confusion_matrix.astype(np.float32) / per_label_sums * 100 + + num_classes = len(labels) + fig, ax = plt.subplots( + figsize=(2 * num_classes, 2 * num_classes * 0.8), dpi=180) + cmap = plt.get_cmap(color_theme) + im = ax.imshow(confusion_matrix, cmap=cmap) + plt.colorbar(mappable=im, ax=ax) + + title_font = {'weight': 'bold', 'size': 12} + ax.set_title(title, fontdict=title_font) + label_font = {'size': 10} + plt.ylabel('Ground Truth Label', fontdict=label_font) + plt.xlabel('Prediction Label', fontdict=label_font) + + # draw locator + xmajor_locator = MultipleLocator(1) + xminor_locator = MultipleLocator(0.5) + ax.xaxis.set_major_locator(xmajor_locator) + ax.xaxis.set_minor_locator(xminor_locator) + ymajor_locator = MultipleLocator(1) + yminor_locator = MultipleLocator(0.5) + ax.yaxis.set_major_locator(ymajor_locator) + ax.yaxis.set_minor_locator(yminor_locator) + + # draw grid + ax.grid(True, which='minor', linestyle='-') + + # draw label + ax.set_xticks(np.arange(num_classes)) + ax.set_yticks(np.arange(num_classes)) + ax.set_xticklabels(labels) + ax.set_yticklabels(labels) + + ax.tick_params( + axis='x', bottom=False, top=True, labelbottom=False, labeltop=True) + plt.setp( + ax.get_xticklabels(), rotation=45, ha='left', rotation_mode='anchor') + + # draw confusion matrix value + for i in range(num_classes): + for j in range(num_classes): + ax.text( + j, + i, + '{}%'.format( + round(confusion_matrix[i, j], 2 + ) if not np.isnan(confusion_matrix[i, j]) else -1), + ha='center', + va='center', + color='w', + size=7) + + ax.set_ylim(len(confusion_matrix) - 0.5, -0.5) # matplotlib>3.1.1 + + fig.tight_layout() + if save_dir is not None: + plt.savefig( + os.path.join(save_dir, 'confusion_matrix.png'), format='png') + if show: + plt.show() + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + results = mmcv.load(args.prediction_path) + + assert isinstance(results, list) + if isinstance(results[0], np.ndarray): + pass + else: + raise TypeError('invalid type of prediction results') + + if isinstance(cfg.data.test, dict): + cfg.data.test.test_mode = True + elif isinstance(cfg.data.test, list): + for ds_cfg in cfg.data.test: + ds_cfg.test_mode = True + + dataset = build_dataset(cfg.data.test) + confusion_matrix = calculate_confusion_matrix(dataset, results) + plot_confusion_matrix( + confusion_matrix, + dataset.CLASSES, + save_dir=args.save_dir, + show=args.show, + title=args.title, + color_theme=args.color_theme) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/chase_db1.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/chase_db1.py new file mode 100644 index 0000000..580e6e7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/chase_db1.py @@ -0,0 +1,88 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv + +CHASE_DB1_LEN = 28 * 3 +TRAINING_LEN = 60 + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert CHASE_DB1 dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='path of CHASEDB1.zip') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + dataset_path = args.dataset_path + if args.out_dir is None: + out_dir = osp.join('data', 'CHASE_DB1') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + print('Extracting CHASEDB1.zip...') + zip_file = zipfile.ZipFile(dataset_path) + zip_file.extractall(tmp_dir) + + print('Generating training dataset...') + + assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \ + 'len(os.listdir(tmp_dir)) != {}'.format(CHASE_DB1_LEN) + + for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, img_name)) + if osp.splitext(img_name)[1] == '.jpg': + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(img_name)[0] + '.png')) + else: + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(img_name)[0] + '.png')) + + for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, img_name)) + if osp.splitext(img_name)[1] == '.jpg': + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(img_name)[0] + '.png')) + else: + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.png')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/cityscapes.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/cityscapes.py new file mode 100644 index 0000000..17b6168 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/cityscapes.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +import mmcv +from cityscapesscripts.preparation.json2labelImg import json2labelImg + + +def convert_json_to_label(json_file): + label_file = json_file.replace('_polygons.json', '_labelTrainIds.png') + json2labelImg(json_file, label_file, 'trainIds') + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert Cityscapes annotations to TrainIds') + parser.add_argument('cityscapes_path', help='cityscapes data path') + parser.add_argument('--gt-dir', default='gtFine', type=str) + parser.add_argument('-o', '--out-dir', help='output path') + parser.add_argument( + '--nproc', default=1, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + cityscapes_path = args.cityscapes_path + out_dir = args.out_dir if args.out_dir else cityscapes_path + mmcv.mkdir_or_exist(out_dir) + + gt_dir = osp.join(cityscapes_path, args.gt_dir) + + poly_files = [] + for poly in mmcv.scandir(gt_dir, '_polygons.json', recursive=True): + poly_file = osp.join(gt_dir, poly) + poly_files.append(poly_file) + if args.nproc > 1: + mmcv.track_parallel_progress(convert_json_to_label, poly_files, + args.nproc) + else: + mmcv.track_progress(convert_json_to_label, poly_files) + + split_names = ['train', 'val', 'test'] + + for split in split_names: + filenames = [] + for poly in mmcv.scandir( + osp.join(gt_dir, split), '_polygons.json', recursive=True): + filenames.append(poly.replace('_gtFine_polygons.json', '')) + with open(osp.join(out_dir, f'{split}.txt'), 'w') as f: + f.writelines(f + '\n' for f in filenames) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/coco_stuff10k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/coco_stuff10k.py new file mode 100644 index 0000000..374f819 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/coco_stuff10k.py @@ -0,0 +1,307 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +import shutil +from functools import partial + +import mmcv +import numpy as np +from PIL import Image +from scipy.io import loadmat + +COCO_LEN = 10000 + +clsID_to_trID = { + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + 10: 10, + 11: 11, + 13: 12, + 14: 13, + 15: 14, + 16: 15, + 17: 16, + 18: 17, + 19: 18, + 20: 19, + 21: 20, + 22: 21, + 23: 22, + 24: 23, + 25: 24, + 27: 25, + 28: 26, + 31: 27, + 32: 28, + 33: 29, + 34: 30, + 35: 31, + 36: 32, + 37: 33, + 38: 34, + 39: 35, + 40: 36, + 41: 37, + 42: 38, + 43: 39, + 44: 40, + 46: 41, + 47: 42, + 48: 43, + 49: 44, + 50: 45, + 51: 46, + 52: 47, + 53: 48, + 54: 49, + 55: 50, + 56: 51, + 57: 52, + 58: 53, + 59: 54, + 60: 55, + 61: 56, + 62: 57, + 63: 58, + 64: 59, + 65: 60, + 67: 61, + 70: 62, + 72: 63, + 73: 64, + 74: 65, + 75: 66, + 76: 67, + 77: 68, + 78: 69, + 79: 70, + 80: 71, + 81: 72, + 82: 73, + 84: 74, + 85: 75, + 86: 76, + 87: 77, + 88: 78, + 89: 79, + 90: 80, + 92: 81, + 93: 82, + 94: 83, + 95: 84, + 96: 85, + 97: 86, + 98: 87, + 99: 88, + 100: 89, + 101: 90, + 102: 91, + 103: 92, + 104: 93, + 105: 94, + 106: 95, + 107: 96, + 108: 97, + 109: 98, + 110: 99, + 111: 100, + 112: 101, + 113: 102, + 114: 103, + 115: 104, + 116: 105, + 117: 106, + 118: 107, + 119: 108, + 120: 109, + 121: 110, + 122: 111, + 123: 112, + 124: 113, + 125: 114, + 126: 115, + 127: 116, + 128: 117, + 129: 118, + 130: 119, + 131: 120, + 132: 121, + 133: 122, + 134: 123, + 135: 124, + 136: 125, + 137: 126, + 138: 127, + 139: 128, + 140: 129, + 141: 130, + 142: 131, + 143: 132, + 144: 133, + 145: 134, + 146: 135, + 147: 136, + 148: 137, + 149: 138, + 150: 139, + 151: 140, + 152: 141, + 153: 142, + 154: 143, + 155: 144, + 156: 145, + 157: 146, + 158: 147, + 159: 148, + 160: 149, + 161: 150, + 162: 151, + 163: 152, + 164: 153, + 165: 154, + 166: 155, + 167: 156, + 168: 157, + 169: 158, + 170: 159, + 171: 160, + 172: 161, + 173: 162, + 174: 163, + 175: 164, + 176: 165, + 177: 166, + 178: 167, + 179: 168, + 180: 169, + 181: 170, + 182: 171 +} + + +def convert_to_trainID(tuple_path, in_img_dir, in_ann_dir, out_img_dir, + out_mask_dir, is_train): + imgpath, maskpath = tuple_path + shutil.copyfile( + osp.join(in_img_dir, imgpath), + osp.join(out_img_dir, 'train2014', imgpath) if is_train else osp.join( + out_img_dir, 'test2014', imgpath)) + annotate = loadmat(osp.join(in_ann_dir, maskpath)) + mask = annotate['S'].astype(np.uint8) + mask_copy = mask.copy() + for clsID, trID in clsID_to_trID.items(): + mask_copy[mask == clsID] = trID + seg_filename = osp.join(out_mask_dir, 'train2014', + maskpath.split('.')[0] + + '_labelTrainIds.png') if is_train else osp.join( + out_mask_dir, 'test2014', + maskpath.split('.')[0] + '_labelTrainIds.png') + Image.fromarray(mask_copy).save(seg_filename, 'PNG') + + +def generate_coco_list(folder): + train_list = osp.join(folder, 'imageLists', 'train.txt') + test_list = osp.join(folder, 'imageLists', 'test.txt') + train_paths = [] + test_paths = [] + + with open(train_list) as f: + for filename in f: + basename = filename.strip() + imgpath = basename + '.jpg' + maskpath = basename + '.mat' + train_paths.append((imgpath, maskpath)) + + with open(test_list) as f: + for filename in f: + basename = filename.strip() + imgpath = basename + '.jpg' + maskpath = basename + '.mat' + test_paths.append((imgpath, maskpath)) + + return train_paths, test_paths + + +def parse_args(): + parser = argparse.ArgumentParser( + description=\ + 'Convert COCO Stuff 10k annotations to mmsegmentation format') # noqa + parser.add_argument('coco_path', help='coco stuff path') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--nproc', default=16, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + coco_path = args.coco_path + nproc = args.nproc + + out_dir = args.out_dir or coco_path + out_img_dir = osp.join(out_dir, 'images') + out_mask_dir = osp.join(out_dir, 'annotations') + + mmcv.mkdir_or_exist(osp.join(out_img_dir, 'train2014')) + mmcv.mkdir_or_exist(osp.join(out_img_dir, 'test2014')) + mmcv.mkdir_or_exist(osp.join(out_mask_dir, 'train2014')) + mmcv.mkdir_or_exist(osp.join(out_mask_dir, 'test2014')) + + train_list, test_list = generate_coco_list(coco_path) + assert (len(train_list) + + len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format( + len(train_list), len(test_list)) + + if args.nproc > 1: + mmcv.track_parallel_progress( + partial( + convert_to_trainID, + in_img_dir=osp.join(coco_path, 'images'), + in_ann_dir=osp.join(coco_path, 'annotations'), + out_img_dir=out_img_dir, + out_mask_dir=out_mask_dir, + is_train=True), + train_list, + nproc=nproc) + mmcv.track_parallel_progress( + partial( + convert_to_trainID, + in_img_dir=osp.join(coco_path, 'images'), + in_ann_dir=osp.join(coco_path, 'annotations'), + out_img_dir=out_img_dir, + out_mask_dir=out_mask_dir, + is_train=False), + test_list, + nproc=nproc) + else: + mmcv.track_progress( + partial( + convert_to_trainID, + in_img_dir=osp.join(coco_path, 'images'), + in_ann_dir=osp.join(coco_path, 'annotations'), + out_img_dir=out_img_dir, + out_mask_dir=out_mask_dir, + is_train=True), train_list) + mmcv.track_progress( + partial( + convert_to_trainID, + in_img_dir=osp.join(coco_path, 'images'), + in_ann_dir=osp.join(coco_path, 'annotations'), + out_img_dir=out_img_dir, + out_mask_dir=out_mask_dir, + is_train=False), test_list) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/coco_stuff164k.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/coco_stuff164k.py new file mode 100644 index 0000000..6d8e2f2 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/coco_stuff164k.py @@ -0,0 +1,264 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +import shutil +from functools import partial +from glob import glob + +import mmcv +import numpy as np +from PIL import Image + +COCO_LEN = 123287 + +clsID_to_trID = { + 0: 0, + 1: 1, + 2: 2, + 3: 3, + 4: 4, + 5: 5, + 6: 6, + 7: 7, + 8: 8, + 9: 9, + 10: 10, + 12: 11, + 13: 12, + 14: 13, + 15: 14, + 16: 15, + 17: 16, + 18: 17, + 19: 18, + 20: 19, + 21: 20, + 22: 21, + 23: 22, + 24: 23, + 26: 24, + 27: 25, + 30: 26, + 31: 27, + 32: 28, + 33: 29, + 34: 30, + 35: 31, + 36: 32, + 37: 33, + 38: 34, + 39: 35, + 40: 36, + 41: 37, + 42: 38, + 43: 39, + 45: 40, + 46: 41, + 47: 42, + 48: 43, + 49: 44, + 50: 45, + 51: 46, + 52: 47, + 53: 48, + 54: 49, + 55: 50, + 56: 51, + 57: 52, + 58: 53, + 59: 54, + 60: 55, + 61: 56, + 62: 57, + 63: 58, + 64: 59, + 66: 60, + 69: 61, + 71: 62, + 72: 63, + 73: 64, + 74: 65, + 75: 66, + 76: 67, + 77: 68, + 78: 69, + 79: 70, + 80: 71, + 81: 72, + 83: 73, + 84: 74, + 85: 75, + 86: 76, + 87: 77, + 88: 78, + 89: 79, + 91: 80, + 92: 81, + 93: 82, + 94: 83, + 95: 84, + 96: 85, + 97: 86, + 98: 87, + 99: 88, + 100: 89, + 101: 90, + 102: 91, + 103: 92, + 104: 93, + 105: 94, + 106: 95, + 107: 96, + 108: 97, + 109: 98, + 110: 99, + 111: 100, + 112: 101, + 113: 102, + 114: 103, + 115: 104, + 116: 105, + 117: 106, + 118: 107, + 119: 108, + 120: 109, + 121: 110, + 122: 111, + 123: 112, + 124: 113, + 125: 114, + 126: 115, + 127: 116, + 128: 117, + 129: 118, + 130: 119, + 131: 120, + 132: 121, + 133: 122, + 134: 123, + 135: 124, + 136: 125, + 137: 126, + 138: 127, + 139: 128, + 140: 129, + 141: 130, + 142: 131, + 143: 132, + 144: 133, + 145: 134, + 146: 135, + 147: 136, + 148: 137, + 149: 138, + 150: 139, + 151: 140, + 152: 141, + 153: 142, + 154: 143, + 155: 144, + 156: 145, + 157: 146, + 158: 147, + 159: 148, + 160: 149, + 161: 150, + 162: 151, + 163: 152, + 164: 153, + 165: 154, + 166: 155, + 167: 156, + 168: 157, + 169: 158, + 170: 159, + 171: 160, + 172: 161, + 173: 162, + 174: 163, + 175: 164, + 176: 165, + 177: 166, + 178: 167, + 179: 168, + 180: 169, + 181: 170, + 255: 255 +} + + +def convert_to_trainID(maskpath, out_mask_dir, is_train): + mask = np.array(Image.open(maskpath)) + mask_copy = mask.copy() + for clsID, trID in clsID_to_trID.items(): + mask_copy[mask == clsID] = trID + seg_filename = osp.join( + out_mask_dir, 'train2017', + osp.basename(maskpath).split('.')[0] + + '_labelTrainIds.png') if is_train else osp.join( + out_mask_dir, 'val2017', + osp.basename(maskpath).split('.')[0] + '_labelTrainIds.png') + Image.fromarray(mask_copy).save(seg_filename, 'PNG') + + +def parse_args(): + parser = argparse.ArgumentParser( + description=\ + 'Convert COCO Stuff 164k annotations to mmsegmentation format') # noqa + parser.add_argument('coco_path', help='coco stuff path') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--nproc', default=16, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + coco_path = args.coco_path + nproc = args.nproc + + out_dir = args.out_dir or coco_path + out_img_dir = osp.join(out_dir, 'images') + out_mask_dir = osp.join(out_dir, 'annotations') + + mmcv.mkdir_or_exist(osp.join(out_mask_dir, 'train2017')) + mmcv.mkdir_or_exist(osp.join(out_mask_dir, 'val2017')) + + if out_dir != coco_path: + shutil.copytree(osp.join(coco_path, 'images'), out_img_dir) + + train_list = glob(osp.join(coco_path, 'annotations', 'train2017', '*.png')) + train_list = [file for file in train_list if '_labelTrainIds' not in file] + test_list = glob(osp.join(coco_path, 'annotations', 'val2017', '*.png')) + test_list = [file for file in test_list if '_labelTrainIds' not in file] + assert (len(train_list) + + len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format( + len(train_list), len(test_list)) + + if args.nproc > 1: + mmcv.track_parallel_progress( + partial( + convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True), + train_list, + nproc=nproc) + mmcv.track_parallel_progress( + partial( + convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False), + test_list, + nproc=nproc) + else: + mmcv.track_progress( + partial( + convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True), + train_list) + mmcv.track_progress( + partial( + convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False), + test_list) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/drive.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/drive.py new file mode 100644 index 0000000..f547579 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/drive.py @@ -0,0 +1,113 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import cv2 +import mmcv + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert DRIVE dataset to mmsegmentation format') + parser.add_argument( + 'training_path', help='the training part of DRIVE dataset') + parser.add_argument( + 'testing_path', help='the testing part of DRIVE dataset') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + training_path = args.training_path + testing_path = args.testing_path + if args.out_dir is None: + out_dir = osp.join('data', 'DRIVE') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + print('Extracting training.zip...') + zip_file = zipfile.ZipFile(training_path) + zip_file.extractall(tmp_dir) + + print('Generating training dataset...') + now_dir = osp.join(tmp_dir, 'training', 'images') + for img_name in os.listdir(now_dir): + img = mmcv.imread(osp.join(now_dir, img_name)) + mmcv.imwrite( + img, + osp.join( + out_dir, 'images', 'training', + osp.splitext(img_name)[0].replace('_training', '') + + '.png')) + + now_dir = osp.join(tmp_dir, 'training', '1st_manual') + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(img_name)[0] + '.png')) + + print('Extracting test.zip...') + zip_file = zipfile.ZipFile(testing_path) + zip_file.extractall(tmp_dir) + + print('Generating validation dataset...') + now_dir = osp.join(tmp_dir, 'test', 'images') + for img_name in os.listdir(now_dir): + img = mmcv.imread(osp.join(now_dir, img_name)) + mmcv.imwrite( + img, + osp.join( + out_dir, 'images', 'validation', + osp.splitext(img_name)[0].replace('_test', '') + '.png')) + + now_dir = osp.join(tmp_dir, 'test', '1st_manual') + if osp.exists(now_dir): + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.png')) + + now_dir = osp.join(tmp_dir, 'test', '2nd_manual') + if osp.exists(now_dir): + for img_name in os.listdir(now_dir): + cap = cv2.VideoCapture(osp.join(now_dir, img_name)) + ret, img = cap.read() + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(img_name)[0] + '.png')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/hrf.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/hrf.py new file mode 100644 index 0000000..5e016e3 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/hrf.py @@ -0,0 +1,111 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv + +HRF_LEN = 15 +TRAINING_LEN = 5 + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert HRF dataset to mmsegmentation format') + parser.add_argument('healthy_path', help='the path of healthy.zip') + parser.add_argument( + 'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip') + parser.add_argument('glaucoma_path', help='the path of glaucoma.zip') + parser.add_argument( + 'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip') + parser.add_argument( + 'diabetic_retinopathy_path', + help='the path of diabetic_retinopathy.zip') + parser.add_argument( + 'diabetic_retinopathy_manualsegm_path', + help='the path of diabetic_retinopathy_manualsegm.zip') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + images_path = [ + args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path + ] + annotations_path = [ + args.healthy_manualsegm_path, args.glaucoma_manualsegm_path, + args.diabetic_retinopathy_manualsegm_path + ] + if args.out_dir is None: + out_dir = osp.join('data', 'HRF') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + print('Generating images...') + for now_path in images_path: + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + zip_file = zipfile.ZipFile(now_path) + zip_file.extractall(tmp_dir) + + assert len(os.listdir(tmp_dir)) == HRF_LEN, \ + 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN) + + for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(filename)[0] + '.png')) + for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Generating annotations...') + for now_path in annotations_path: + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + zip_file = zipfile.ZipFile(now_path) + zip_file.extractall(tmp_dir) + + assert len(os.listdir(tmp_dir)) == HRF_LEN, \ + 'len(os.listdir(tmp_dir)) != {}'.format(HRF_LEN) + + for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a + # threshold to convert the nonstandard annotation imgs. The + # value divided by 128 is equivalent to '1 if value >= 128 + # else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.png')) + for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(tmp_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/isaid.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/isaid.py new file mode 100644 index 0000000..314fb89 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/isaid.py @@ -0,0 +1,245 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import os +import os.path as osp +import shutil +import tempfile +import zipfile + +import mmcv +import numpy as np +from PIL import Image + +iSAID_palette = \ + { + 0: (0, 0, 0), + 1: (0, 0, 63), + 2: (0, 63, 63), + 3: (0, 63, 0), + 4: (0, 63, 127), + 5: (0, 63, 191), + 6: (0, 63, 255), + 7: (0, 127, 63), + 8: (0, 127, 127), + 9: (0, 0, 127), + 10: (0, 0, 191), + 11: (0, 0, 255), + 12: (0, 191, 127), + 13: (0, 127, 191), + 14: (0, 127, 255), + 15: (0, 100, 155) + } + +iSAID_invert_palette = {v: k for k, v in iSAID_palette.items()} + + +def iSAID_convert_from_color(arr_3d, palette=iSAID_invert_palette): + """RGB-color encoding to grayscale labels.""" + arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8) + + for c, i in palette.items(): + m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2) + arr_2d[m] = i + + return arr_2d + + +def slide_crop_image(src_path, out_dir, mode, patch_H, patch_W, overlap): + img = np.asarray(Image.open(src_path).convert('RGB')) + + img_H, img_W, _ = img.shape + + if img_H < patch_H and img_W > patch_W: + + img = mmcv.impad(img, shape=(patch_H, img_W), pad_val=0) + + img_H, img_W, _ = img.shape + + elif img_H > patch_H and img_W < patch_W: + + img = mmcv.impad(img, shape=(img_H, patch_W), pad_val=0) + + img_H, img_W, _ = img.shape + + elif img_H < patch_H and img_W < patch_W: + + img = mmcv.impad(img, shape=(patch_H, patch_W), pad_val=0) + + img_H, img_W, _ = img.shape + + for x in range(0, img_W, patch_W - overlap): + for y in range(0, img_H, patch_H - overlap): + x_str = x + x_end = x + patch_W + if x_end > img_W: + diff_x = x_end - img_W + x_str -= diff_x + x_end = img_W + y_str = y + y_end = y + patch_H + if y_end > img_H: + diff_y = y_end - img_H + y_str -= diff_y + y_end = img_H + + img_patch = img[y_str:y_end, x_str:x_end, :] + img_patch = Image.fromarray(img_patch.astype(np.uint8)) + image = osp.basename(src_path).split('.')[0] + '_' + str( + y_str) + '_' + str(y_end) + '_' + str(x_str) + '_' + str( + x_end) + '.png' + # print(image) + save_path_image = osp.join(out_dir, 'img_dir', mode, str(image)) + img_patch.save(save_path_image) + + +def slide_crop_label(src_path, out_dir, mode, patch_H, patch_W, overlap): + label = mmcv.imread(src_path, channel_order='rgb') + label = iSAID_convert_from_color(label) + img_H, img_W = label.shape + + if img_H < patch_H and img_W > patch_W: + + label = mmcv.impad(label, shape=(patch_H, img_W), pad_val=255) + + img_H = patch_H + + elif img_H > patch_H and img_W < patch_W: + + label = mmcv.impad(label, shape=(img_H, patch_W), pad_val=255) + + img_W = patch_W + + elif img_H < patch_H and img_W < patch_W: + + label = mmcv.impad(label, shape=(patch_H, patch_W), pad_val=255) + + img_H = patch_H + img_W = patch_W + + for x in range(0, img_W, patch_W - overlap): + for y in range(0, img_H, patch_H - overlap): + x_str = x + x_end = x + patch_W + if x_end > img_W: + diff_x = x_end - img_W + x_str -= diff_x + x_end = img_W + y_str = y + y_end = y + patch_H + if y_end > img_H: + diff_y = y_end - img_H + y_str -= diff_y + y_end = img_H + + lab_patch = label[y_str:y_end, x_str:x_end] + lab_patch = Image.fromarray(lab_patch.astype(np.uint8), mode='P') + + image = osp.basename(src_path).split('.')[0].split( + '_')[0] + '_' + str(y_str) + '_' + str(y_end) + '_' + str( + x_str) + '_' + str(x_end) + '_instance_color_RGB' + '.png' + lab_patch.save(osp.join(out_dir, 'ann_dir', mode, str(image))) + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert iSAID dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='iSAID folder path') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + + parser.add_argument( + '--patch_width', + default=896, + type=int, + help='Width of the cropped image patch') + parser.add_argument( + '--patch_height', + default=896, + type=int, + help='Height of the cropped image patch') + parser.add_argument( + '--overlap_area', default=384, type=int, help='Overlap area') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + dataset_path = args.dataset_path + # image patch width and height + patch_H, patch_W = args.patch_width, args.patch_height + + overlap = args.overlap_area # overlap area + + if args.out_dir is None: + out_dir = osp.join('data', 'iSAID') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test')) + + mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'test')) + + assert os.path.exists(os.path.join(dataset_path, 'train')), \ + 'train is not in {}'.format(dataset_path) + assert os.path.exists(os.path.join(dataset_path, 'val')), \ + 'val is not in {}'.format(dataset_path) + assert os.path.exists(os.path.join(dataset_path, 'test')), \ + 'test is not in {}'.format(dataset_path) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + for dataset_mode in ['train', 'val', 'test']: + + # for dataset_mode in [ 'test']: + print('Extracting {}ing.zip...'.format(dataset_mode)) + img_zipp_list = glob.glob( + os.path.join(dataset_path, dataset_mode, 'images', '*.zip')) + print('Find the data', img_zipp_list) + for img_zipp in img_zipp_list: + zip_file = zipfile.ZipFile(img_zipp) + zip_file.extractall(os.path.join(tmp_dir, dataset_mode, 'img')) + src_path_list = glob.glob( + os.path.join(tmp_dir, dataset_mode, 'img', 'images', '*.png')) + + src_prog_bar = mmcv.ProgressBar(len(src_path_list)) + for i, img_path in enumerate(src_path_list): + if dataset_mode != 'test': + slide_crop_image(img_path, out_dir, dataset_mode, patch_H, + patch_W, overlap) + + else: + shutil.move(img_path, + os.path.join(out_dir, 'img_dir', dataset_mode)) + src_prog_bar.update() + + if dataset_mode != 'test': + label_zipp_list = glob.glob( + os.path.join(dataset_path, dataset_mode, 'Semantic_masks', + '*.zip')) + for label_zipp in label_zipp_list: + zip_file = zipfile.ZipFile(label_zipp) + zip_file.extractall( + os.path.join(tmp_dir, dataset_mode, 'lab')) + + lab_path_list = glob.glob( + os.path.join(tmp_dir, dataset_mode, 'lab', 'images', + '*.png')) + lab_prog_bar = mmcv.ProgressBar(len(lab_path_list)) + for i, lab_path in enumerate(lab_path_list): + slide_crop_label(lab_path, out_dir, dataset_mode, patch_H, + patch_W, overlap) + lab_prog_bar.update() + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/loveda.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/loveda.py new file mode 100644 index 0000000..3a06268 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/loveda.py @@ -0,0 +1,73 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import shutil +import tempfile +import zipfile + +import mmcv + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert LoveDA dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='LoveDA folder path') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + dataset_path = args.dataset_path + if args.out_dir is None: + out_dir = osp.join('data', 'loveDA') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val')) + + assert 'Train.zip' in os.listdir(dataset_path), \ + 'Train.zip is not in {}'.format(dataset_path) + assert 'Val.zip' in os.listdir(dataset_path), \ + 'Val.zip is not in {}'.format(dataset_path) + assert 'Test.zip' in os.listdir(dataset_path), \ + 'Test.zip is not in {}'.format(dataset_path) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + for dataset in ['Train', 'Val', 'Test']: + zip_file = zipfile.ZipFile( + os.path.join(dataset_path, dataset + '.zip')) + zip_file.extractall(tmp_dir) + data_type = dataset.lower() + for location in ['Rural', 'Urban']: + for image_type in ['images_png', 'masks_png']: + if image_type == 'images_png': + dst = osp.join(out_dir, 'img_dir', data_type) + else: + dst = osp.join(out_dir, 'ann_dir', data_type) + if dataset == 'Test' and image_type == 'masks_png': + continue + else: + src_dir = osp.join(tmp_dir, dataset, location, + image_type) + src_lst = os.listdir(src_dir) + for file in src_lst: + shutil.move(osp.join(src_dir, file), dst) + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/pascal_context.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/pascal_context.py new file mode 100644 index 0000000..03b79d5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/pascal_context.py @@ -0,0 +1,87 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from functools import partial + +import mmcv +import numpy as np +from detail import Detail +from PIL import Image + +_mapping = np.sort( + np.array([ + 0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284, + 158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59, + 440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355, + 85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115 + ])) +_key = np.array(range(len(_mapping))).astype('uint8') + + +def generate_labels(img_id, detail, out_dir): + + def _class_to_index(mask, _mapping, _key): + # assert the values + values = np.unique(mask) + for i in range(len(values)): + assert (values[i] in _mapping) + index = np.digitize(mask.ravel(), _mapping, right=True) + return _key[index].reshape(mask.shape) + + mask = Image.fromarray( + _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key)) + filename = img_id['file_name'] + mask.save(osp.join(out_dir, filename.replace('jpg', 'png'))) + return osp.splitext(osp.basename(filename))[0] + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert PASCAL VOC annotations to mmsegmentation format') + parser.add_argument('devkit_path', help='pascal voc devkit path') + parser.add_argument('json_path', help='annoation json filepath') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + devkit_path = args.devkit_path + if args.out_dir is None: + out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext') + else: + out_dir = args.out_dir + json_path = args.json_path + mmcv.mkdir_or_exist(out_dir) + img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages') + + train_detail = Detail(json_path, img_dir, 'train') + train_ids = train_detail.getImgs() + + val_detail = Detail(json_path, img_dir, 'val') + val_ids = val_detail.getImgs() + + mmcv.mkdir_or_exist( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext')) + + train_list = mmcv.track_progress( + partial(generate_labels, detail=train_detail, out_dir=out_dir), + train_ids) + with open( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', + 'train.txt'), 'w') as f: + f.writelines(line + '\n' for line in sorted(train_list)) + + val_list = mmcv.track_progress( + partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids) + with open( + osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext', + 'val.txt'), 'w') as f: + f.writelines(line + '\n' for line in sorted(val_list)) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/potsdam.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/potsdam.py new file mode 100644 index 0000000..87e67d5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/potsdam.py @@ -0,0 +1,157 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import math +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv +import numpy as np + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert potsdam dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='potsdam folder path') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--clip_size', + type=int, + help='clipped size of image after preparation', + default=512) + parser.add_argument( + '--stride_size', + type=int, + help='stride of clipping original images', + default=256) + args = parser.parse_args() + return args + + +def clip_big_image(image_path, clip_save_dir, args, to_label=False): + # Original image of Potsdam dataset is very large, thus pre-processing + # of them is adopted. Given fixed clip size and stride size to generate + # clipped image, the intersection of width and height is determined. + # For example, given one 5120 x 5120 original image, the clip size is + # 512 and stride size is 256, thus it would generate 20x20 = 400 images + # whose size are all 512x512. + image = mmcv.imread(image_path) + + h, w, c = image.shape + clip_size = args.clip_size + stride_size = args.stride_size + + num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil( + (h - clip_size) / + stride_size) * stride_size + clip_size >= h else math.ceil( + (h - clip_size) / stride_size) + 1 + num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil( + (w - clip_size) / + stride_size) * stride_size + clip_size >= w else math.ceil( + (w - clip_size) / stride_size) + 1 + + x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1)) + xmin = x * clip_size + ymin = y * clip_size + + xmin = xmin.ravel() + ymin = ymin.ravel() + xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size, + np.zeros_like(xmin)) + ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size, + np.zeros_like(ymin)) + boxes = np.stack([ + xmin + xmin_offset, ymin + ymin_offset, + np.minimum(xmin + clip_size, w), + np.minimum(ymin + clip_size, h) + ], + axis=1) + + if to_label: + color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0], + [255, 255, 0], [0, 255, 0], [0, 255, 255], + [0, 0, 255]]) + flatten_v = np.matmul( + image.reshape(-1, c), + np.array([2, 3, 4]).reshape(3, 1)) + out = np.zeros_like(flatten_v) + for idx, class_color in enumerate(color_map): + value_idx = np.matmul(class_color, + np.array([2, 3, 4]).reshape(3, 1)) + out[flatten_v == value_idx] = idx + image = out.reshape(h, w) + + for box in boxes: + start_x, start_y, end_x, end_y = box + clipped_image = image[start_y:end_y, + start_x:end_x] if to_label else image[ + start_y:end_y, start_x:end_x, :] + idx_i, idx_j = osp.basename(image_path).split('_')[2:4] + mmcv.imwrite( + clipped_image.astype(np.uint8), + osp.join( + clip_save_dir, + f'{idx_i}_{idx_j}_{start_x}_{start_y}_{end_x}_{end_y}.png')) + + +def main(): + args = parse_args() + splits = { + 'train': [ + '2_10', '2_11', '2_12', '3_10', '3_11', '3_12', '4_10', '4_11', + '4_12', '5_10', '5_11', '5_12', '6_10', '6_11', '6_12', '6_7', + '6_8', '6_9', '7_10', '7_11', '7_12', '7_7', '7_8', '7_9' + ], + 'val': [ + '5_15', '6_15', '6_13', '3_13', '4_14', '6_14', '5_14', '2_13', + '4_15', '2_14', '5_13', '4_13', '3_14', '7_13' + ] + } + + dataset_path = args.dataset_path + if args.out_dir is None: + out_dir = osp.join('data', 'potsdam') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val')) + + zipp_list = glob.glob(os.path.join(dataset_path, '*.zip')) + print('Find the data', zipp_list) + + for zipp in zipp_list: + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + zip_file = zipfile.ZipFile(zipp) + zip_file.extractall(tmp_dir) + src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif')) + if not len(src_path_list): + sub_tmp_dir = os.path.join(tmp_dir, os.listdir(tmp_dir)[0]) + src_path_list = glob.glob(os.path.join(sub_tmp_dir, '*.tif')) + + prog_bar = mmcv.ProgressBar(len(src_path_list)) + for i, src_path in enumerate(src_path_list): + idx_i, idx_j = osp.basename(src_path).split('_')[2:4] + data_type = 'train' if f'{idx_i}_{idx_j}' in splits[ + 'train'] else 'val' + if 'label' in src_path: + dst_dir = osp.join(out_dir, 'ann_dir', data_type) + clip_big_image(src_path, dst_dir, args, to_label=True) + else: + dst_dir = osp.join(out_dir, 'img_dir', data_type) + clip_big_image(src_path, dst_dir, args, to_label=False) + prog_bar.update() + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/stare.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/stare.py new file mode 100644 index 0000000..29b78c0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/stare.py @@ -0,0 +1,166 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import gzip +import os +import os.path as osp +import tarfile +import tempfile + +import mmcv + +STARE_LEN = 20 +TRAINING_LEN = 10 + + +def un_gz(src, dst): + g_file = gzip.GzipFile(src) + with open(dst, 'wb+') as f: + f.write(g_file.read()) + g_file.close() + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert STARE dataset to mmsegmentation format') + parser.add_argument('image_path', help='the path of stare-images.tar') + parser.add_argument('labels_ah', help='the path of labels-ah.tar') + parser.add_argument('labels_vk', help='the path of labels-vk.tar') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + image_path = args.image_path + labels_ah = args.labels_ah + labels_vk = args.labels_vk + if args.out_dir is None: + out_dir = osp.join('data', 'STARE') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(out_dir) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'images', 'validation')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'training')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation')) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting stare-images.tar...') + with tarfile.open(image_path) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'training', + osp.splitext(filename)[0] + '.png')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img, + osp.join(out_dir, 'images', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Removing the temporary files...') + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting labels-ah.tar...') + with tarfile.open(labels_ah) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + # The annotation img should be divided by 128, because some of + # the annotation imgs are not standard. We should set a threshold + # to convert the nonstandard annotation imgs. The value divided by + # 128 equivalent to '1 if value >= 128 else 0' + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.png')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Removing the temporary files...') + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'gz')) + mmcv.mkdir_or_exist(osp.join(tmp_dir, 'files')) + + print('Extracting labels-vk.tar...') + with tarfile.open(labels_vk) as f: + f.extractall(osp.join(tmp_dir, 'gz')) + + for filename in os.listdir(osp.join(tmp_dir, 'gz')): + un_gz( + osp.join(tmp_dir, 'gz', filename), + osp.join(tmp_dir, 'files', + osp.splitext(filename)[0])) + + now_dir = osp.join(tmp_dir, 'files') + + assert len(os.listdir(now_dir)) == STARE_LEN, \ + 'len(os.listdir(now_dir)) != {}'.format(STARE_LEN) + + for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'training', + osp.splitext(filename)[0] + '.png')) + + for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]: + img = mmcv.imread(osp.join(now_dir, filename)) + mmcv.imwrite( + img[:, :, 0] // 128, + osp.join(out_dir, 'annotations', 'validation', + osp.splitext(filename)[0] + '.png')) + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/vaihingen.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/vaihingen.py new file mode 100644 index 0000000..b025ae5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/vaihingen.py @@ -0,0 +1,155 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import glob +import math +import os +import os.path as osp +import tempfile +import zipfile + +import mmcv +import numpy as np + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert vaihingen dataset to mmsegmentation format') + parser.add_argument('dataset_path', help='vaihingen folder path') + parser.add_argument('--tmp_dir', help='path of the temporary directory') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--clip_size', + type=int, + help='clipped size of image after preparation', + default=512) + parser.add_argument( + '--stride_size', + type=int, + help='stride of clipping original images', + default=256) + args = parser.parse_args() + return args + + +def clip_big_image(image_path, clip_save_dir, to_label=False): + # Original image of Vaihingen dataset is very large, thus pre-processing + # of them is adopted. Given fixed clip size and stride size to generate + # clipped image, the intersection of width and height is determined. + # For example, given one 5120 x 5120 original image, the clip size is + # 512 and stride size is 256, thus it would generate 20x20 = 400 images + # whose size are all 512x512. + image = mmcv.imread(image_path) + + h, w, c = image.shape + cs = args.clip_size + ss = args.stride_size + + num_rows = math.ceil((h - cs) / ss) if math.ceil( + (h - cs) / ss) * ss + cs >= h else math.ceil((h - cs) / ss) + 1 + num_cols = math.ceil((w - cs) / ss) if math.ceil( + (w - cs) / ss) * ss + cs >= w else math.ceil((w - cs) / ss) + 1 + + x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1)) + xmin = x * cs + ymin = y * cs + + xmin = xmin.ravel() + ymin = ymin.ravel() + xmin_offset = np.where(xmin + cs > w, w - xmin - cs, np.zeros_like(xmin)) + ymin_offset = np.where(ymin + cs > h, h - ymin - cs, np.zeros_like(ymin)) + boxes = np.stack([ + xmin + xmin_offset, ymin + ymin_offset, + np.minimum(xmin + cs, w), + np.minimum(ymin + cs, h) + ], + axis=1) + + if to_label: + color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0], + [255, 255, 0], [0, 255, 0], [0, 255, 255], + [0, 0, 255]]) + flatten_v = np.matmul( + image.reshape(-1, c), + np.array([2, 3, 4]).reshape(3, 1)) + out = np.zeros_like(flatten_v) + for idx, class_color in enumerate(color_map): + value_idx = np.matmul(class_color, + np.array([2, 3, 4]).reshape(3, 1)) + out[flatten_v == value_idx] = idx + image = out.reshape(h, w) + + for box in boxes: + start_x, start_y, end_x, end_y = box + clipped_image = image[start_y:end_y, + start_x:end_x] if to_label else image[ + start_y:end_y, start_x:end_x, :] + area_idx = osp.basename(image_path).split('_')[3].strip('.tif') + mmcv.imwrite( + clipped_image.astype(np.uint8), + osp.join(clip_save_dir, + f'{area_idx}_{start_x}_{start_y}_{end_x}_{end_y}.png')) + + +def main(): + splits = { + 'train': [ + 'area1', 'area11', 'area13', 'area15', 'area17', 'area21', + 'area23', 'area26', 'area28', 'area3', 'area30', 'area32', + 'area34', 'area37', 'area5', 'area7' + ], + 'val': [ + 'area6', 'area24', 'area35', 'area16', 'area14', 'area22', + 'area10', 'area4', 'area2', 'area20', 'area8', 'area31', 'area33', + 'area27', 'area38', 'area12', 'area29' + ], + } + + dataset_path = args.dataset_path + if args.out_dir is None: + out_dir = osp.join('data', 'vaihingen') + else: + out_dir = args.out_dir + + print('Making directories...') + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train')) + mmcv.mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val')) + + zipp_list = glob.glob(os.path.join(dataset_path, '*.zip')) + print('Find the data', zipp_list) + + with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: + for zipp in zipp_list: + zip_file = zipfile.ZipFile(zipp) + zip_file.extractall(tmp_dir) + src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif')) + if 'ISPRS_semantic_labeling_Vaihingen' in zipp: + src_path_list = glob.glob( + os.path.join(os.path.join(tmp_dir, 'top'), '*.tif')) + if 'ISPRS_semantic_labeling_Vaihingen_ground_truth_eroded_COMPLETE' in zipp: # noqa + src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif')) + # delete unused area9 ground truth + for area_ann in src_path_list: + if 'area9' in area_ann: + src_path_list.remove(area_ann) + prog_bar = mmcv.ProgressBar(len(src_path_list)) + for i, src_path in enumerate(src_path_list): + area_idx = osp.basename(src_path).split('_')[3].strip('.tif') + data_type = 'train' if area_idx in splits['train'] else 'val' + if 'noBoundary' in src_path: + dst_dir = osp.join(out_dir, 'ann_dir', data_type) + clip_big_image(src_path, dst_dir, to_label=True) + else: + dst_dir = osp.join(out_dir, 'img_dir', data_type) + clip_big_image(src_path, dst_dir, to_label=False) + prog_bar.update() + + print('Removing the temporary files...') + + print('Done!') + + +if __name__ == '__main__': + args = parse_args() + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/voc_aug.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/voc_aug.py new file mode 100644 index 0000000..1d42c27 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/convert_datasets/voc_aug.py @@ -0,0 +1,92 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from functools import partial + +import mmcv +import numpy as np +from PIL import Image +from scipy.io import loadmat + +AUG_LEN = 10582 + + +def convert_mat(mat_file, in_dir, out_dir): + data = loadmat(osp.join(in_dir, mat_file)) + mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8) + seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png')) + Image.fromarray(mask).save(seg_filename, 'PNG') + + +def generate_aug_list(merged_list, excluded_list): + return list(set(merged_list) - set(excluded_list)) + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert PASCAL VOC annotations to mmsegmentation format') + parser.add_argument('devkit_path', help='pascal voc devkit path') + parser.add_argument('aug_path', help='pascal voc aug path') + parser.add_argument('-o', '--out_dir', help='output path') + parser.add_argument( + '--nproc', default=1, type=int, help='number of process') + args = parser.parse_args() + return args + + +def main(): + args = parse_args() + devkit_path = args.devkit_path + aug_path = args.aug_path + nproc = args.nproc + if args.out_dir is None: + out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug') + else: + out_dir = args.out_dir + mmcv.mkdir_or_exist(out_dir) + in_dir = osp.join(aug_path, 'dataset', 'cls') + + mmcv.track_parallel_progress( + partial(convert_mat, in_dir=in_dir, out_dir=out_dir), + list(mmcv.scandir(in_dir, suffix='.mat')), + nproc=nproc) + + full_aug_list = [] + with open(osp.join(aug_path, 'dataset', 'train.txt')) as f: + full_aug_list += [line.strip() for line in f] + with open(osp.join(aug_path, 'dataset', 'val.txt')) as f: + full_aug_list += [line.strip() for line in f] + + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', + 'train.txt')) as f: + ori_train_list = [line.strip() for line in f] + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', + 'val.txt')) as f: + val_list = [line.strip() for line in f] + + aug_train_list = generate_aug_list(ori_train_list + full_aug_list, + val_list) + assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format( + AUG_LEN) + + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', + 'trainaug.txt'), 'w') as f: + f.writelines(line + '\n' for line in aug_train_list) + + aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list) + assert len(aug_list) == AUG_LEN - len( + ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN - + len(ori_train_list)) + with open( + osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'), + 'w') as f: + f.writelines(line + '\n' for line in aug_list) + + print('Done!') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/deploy_test.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/deploy_test.py new file mode 100644 index 0000000..eca5430 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/deploy_test.py @@ -0,0 +1,338 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import shutil +import warnings +from typing import Any, Iterable + +import mmcv +import numpy as np +import torch +from mmcv.parallel import MMDataParallel +from mmcv.runner import get_dist_info +from mmcv.utils import DictAction + +from mmseg.apis import single_gpu_test +from mmseg.datasets import build_dataloader, build_dataset +from mmseg.models.segmentors.base import BaseSegmentor +from mmseg.ops import resize + + +class ONNXRuntimeSegmentor(BaseSegmentor): + + def __init__(self, onnx_file: str, cfg: Any, device_id: int): + super(ONNXRuntimeSegmentor, self).__init__() + import onnxruntime as ort + + # get the custom op path + ort_custom_op_path = '' + try: + from mmcv.ops import get_onnxruntime_op_path + ort_custom_op_path = get_onnxruntime_op_path() + except (ImportError, ModuleNotFoundError): + warnings.warn('If input model has custom op from mmcv, \ + you may have to build mmcv with ONNXRuntime from source.') + session_options = ort.SessionOptions() + # register custom op for onnxruntime + if osp.exists(ort_custom_op_path): + session_options.register_custom_ops_library(ort_custom_op_path) + sess = ort.InferenceSession(onnx_file, session_options) + providers = ['CPUExecutionProvider'] + options = [{}] + is_cuda_available = ort.get_device() == 'GPU' + if is_cuda_available: + providers.insert(0, 'CUDAExecutionProvider') + options.insert(0, {'device_id': device_id}) + + sess.set_providers(providers, options) + + self.sess = sess + self.device_id = device_id + self.io_binding = sess.io_binding() + self.output_names = [_.name for _ in sess.get_outputs()] + for name in self.output_names: + self.io_binding.bind_output(name) + self.cfg = cfg + self.test_mode = cfg.model.test_cfg.mode + self.is_cuda_available = is_cuda_available + + def extract_feat(self, imgs): + raise NotImplementedError('This method is not implemented.') + + def encode_decode(self, img, img_metas): + raise NotImplementedError('This method is not implemented.') + + def forward_train(self, imgs, img_metas, **kwargs): + raise NotImplementedError('This method is not implemented.') + + def simple_test(self, img: torch.Tensor, img_meta: Iterable, + **kwargs) -> list: + if not self.is_cuda_available: + img = img.detach().cpu() + elif self.device_id >= 0: + img = img.cuda(self.device_id) + device_type = img.device.type + self.io_binding.bind_input( + name='input', + device_type=device_type, + device_id=self.device_id, + element_type=np.float32, + shape=img.shape, + buffer_ptr=img.data_ptr()) + self.sess.run_with_iobinding(self.io_binding) + seg_pred = self.io_binding.copy_outputs_to_cpu()[0] + # whole might support dynamic reshape + ori_shape = img_meta[0]['ori_shape'] + if not (ori_shape[0] == seg_pred.shape[-2] + and ori_shape[1] == seg_pred.shape[-1]): + seg_pred = torch.from_numpy(seg_pred).float() + seg_pred = resize( + seg_pred, size=tuple(ori_shape[:2]), mode='nearest') + seg_pred = seg_pred.long().detach().cpu().numpy() + seg_pred = seg_pred[0] + seg_pred = list(seg_pred) + return seg_pred + + def aug_test(self, imgs, img_metas, **kwargs): + raise NotImplementedError('This method is not implemented.') + + +class TensorRTSegmentor(BaseSegmentor): + + def __init__(self, trt_file: str, cfg: Any, device_id: int): + super(TensorRTSegmentor, self).__init__() + from mmcv.tensorrt import TRTWraper, load_tensorrt_plugin + try: + load_tensorrt_plugin() + except (ImportError, ModuleNotFoundError): + warnings.warn('If input model has custom op from mmcv, \ + you may have to build mmcv with TensorRT from source.') + model = TRTWraper( + trt_file, input_names=['input'], output_names=['output']) + + self.model = model + self.device_id = device_id + self.cfg = cfg + self.test_mode = cfg.model.test_cfg.mode + + def extract_feat(self, imgs): + raise NotImplementedError('This method is not implemented.') + + def encode_decode(self, img, img_metas): + raise NotImplementedError('This method is not implemented.') + + def forward_train(self, imgs, img_metas, **kwargs): + raise NotImplementedError('This method is not implemented.') + + def simple_test(self, img: torch.Tensor, img_meta: Iterable, + **kwargs) -> list: + with torch.cuda.device(self.device_id), torch.no_grad(): + seg_pred = self.model({'input': img})['output'] + seg_pred = seg_pred.detach().cpu().numpy() + # whole might support dynamic reshape + ori_shape = img_meta[0]['ori_shape'] + if not (ori_shape[0] == seg_pred.shape[-2] + and ori_shape[1] == seg_pred.shape[-1]): + seg_pred = torch.from_numpy(seg_pred).float() + seg_pred = resize( + seg_pred, size=tuple(ori_shape[:2]), mode='nearest') + seg_pred = seg_pred.long().detach().cpu().numpy() + seg_pred = seg_pred[0] + seg_pred = list(seg_pred) + return seg_pred + + def aug_test(self, imgs, img_metas, **kwargs): + raise NotImplementedError('This method is not implemented.') + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description='mmseg backend test (and eval)') + parser.add_argument('config', help='test config file path') + parser.add_argument('model', help='Input model file') + parser.add_argument( + '--backend', + help='Backend of the model.', + choices=['onnxruntime', 'tensorrt']) + parser.add_argument('--out', help='output result file in pickle format') + parser.add_argument( + '--format-only', + action='store_true', + help='Format the output results without perform evaluation. It is' + 'useful when you want to format the result to a specific format and ' + 'submit it to the test server') + parser.add_argument( + '--eval', + type=str, + nargs='+', + help='evaluation metrics, which depends on the dataset, e.g., "mIoU"' + ' for generic datasets, and "cityscapes" for Cityscapes') + parser.add_argument('--show', action='store_true', help='show results') + parser.add_argument( + '--show-dir', help='directory where painted images will be saved') + parser.add_argument( + '--options', + nargs='+', + action=DictAction, + help="--options is deprecated in favor of --cfg_options' and it will " + 'not be supported in version v0.22.0. Override some settings in the ' + 'used config, the key-value pair in xxx=yyy format will be merged ' + 'into config file. If the value to be overwritten is a list, it ' + 'should be like key="[a,b]" or key=a,b It also allows nested ' + 'list/tuple values, e.g. key="[(a,b),(c,d)]" Note that the quotation ' + 'marks are necessary and that no white space is allowed.') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--eval-options', + nargs='+', + action=DictAction, + help='custom options for evaluation') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + parser.add_argument('--local_rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + if args.options and args.cfg_options: + raise ValueError( + '--options and --cfg-options cannot be both ' + 'specified, --options is deprecated in favor of --cfg-options. ' + '--options will not be supported in version v0.22.0.') + if args.options: + warnings.warn('--options is deprecated in favor of --cfg-options. ' + '--options will not be supported in version v0.22.0.') + args.cfg_options = args.options + + return args + + +def main(): + args = parse_args() + + assert args.out or args.eval or args.format_only or args.show \ + or args.show_dir, \ + ('Please specify at least one operation (save/eval/format/show the ' + 'results / save the results) with the argument "--out", "--eval"' + ', "--format-only", "--show" or "--show-dir"') + + if args.eval and args.format_only: + raise ValueError('--eval and --format_only cannot be both specified') + + if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): + raise ValueError('The output file must be a pkl file.') + + cfg = mmcv.Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + cfg.model.pretrained = None + cfg.data.test.test_mode = True + + # init distributed env first, since logger depends on the dist info. + distributed = False + + # build the dataloader + # TODO: support multiple images per gpu (only minor changes are needed) + dataset = build_dataset(cfg.data.test) + data_loader = build_dataloader( + dataset, + samples_per_gpu=1, + workers_per_gpu=cfg.data.workers_per_gpu, + dist=distributed, + shuffle=False) + + # load onnx config and meta + cfg.model.train_cfg = None + + if args.backend == 'onnxruntime': + model = ONNXRuntimeSegmentor(args.model, cfg=cfg, device_id=0) + elif args.backend == 'tensorrt': + model = TensorRTSegmentor(args.model, cfg=cfg, device_id=0) + + model.CLASSES = dataset.CLASSES + model.PALETTE = dataset.PALETTE + + # clean gpu memory when starting a new evaluation. + torch.cuda.empty_cache() + eval_kwargs = {} if args.eval_options is None else args.eval_options + + # Deprecated + efficient_test = eval_kwargs.get('efficient_test', False) + if efficient_test: + warnings.warn( + '``efficient_test=True`` does not have effect in tools/test.py, ' + 'the evaluation and format results are CPU memory efficient by ' + 'default') + + eval_on_format_results = ( + args.eval is not None and 'cityscapes' in args.eval) + if eval_on_format_results: + assert len(args.eval) == 1, 'eval on format results is not ' \ + 'applicable for metrics other than ' \ + 'cityscapes' + if args.format_only or eval_on_format_results: + if 'imgfile_prefix' in eval_kwargs: + tmpdir = eval_kwargs['imgfile_prefix'] + else: + tmpdir = '.format_cityscapes' + eval_kwargs.setdefault('imgfile_prefix', tmpdir) + mmcv.mkdir_or_exist(tmpdir) + else: + tmpdir = None + + model = MMDataParallel(model, device_ids=[0]) + results = single_gpu_test( + model, + data_loader, + args.show, + args.show_dir, + False, + args.opacity, + pre_eval=args.eval is not None and not eval_on_format_results, + format_only=args.format_only or eval_on_format_results, + format_args=eval_kwargs) + + rank, _ = get_dist_info() + if rank == 0: + if args.out: + warnings.warn( + 'The behavior of ``args.out`` has been changed since MMSeg ' + 'v0.16, the pickled outputs could be seg map as type of ' + 'np.array, pre-eval results or file paths for ' + '``dataset.format_results()``.') + print(f'\nwriting results to {args.out}') + mmcv.dump(results, args.out) + if args.eval: + dataset.evaluate(results, args.eval, **eval_kwargs) + if tmpdir is not None and eval_on_format_results: + # remove tmp dir when cityscapes evaluation + shutil.rmtree(tmpdir) + + +if __name__ == '__main__': + main() + + # Following strings of text style are from colorama package + bright_style, reset_style = '\x1b[1m', '\x1b[0m' + red_text, blue_text = '\x1b[31m', '\x1b[34m' + white_background = '\x1b[107m' + + msg = white_background + bright_style + red_text + msg += 'DeprecationWarning: This tool will be deprecated in future. ' + msg += blue_text + 'Welcome to use the unified model deployment toolbox ' + msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy' + msg += reset_style + warnings.warn(msg) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/dist_test.sh b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/dist_test.sh new file mode 100644 index 0000000..89711fd --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/dist_test.sh @@ -0,0 +1,20 @@ +CONFIG=$1 +CHECKPOINT=$2 +GPUS=$3 +NNODES=${NNODES:-1} +NODE_RANK=${NODE_RANK:-0} +PORT=${PORT:-29500} +MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch \ + --nnodes=$NNODES \ + --node_rank=$NODE_RANK \ + --master_addr=$MASTER_ADDR \ + --nproc_per_node=$GPUS \ + --master_port=$PORT \ + $(dirname "$0")/test.py \ + $CONFIG \ + $CHECKPOINT \ + --launcher pytorch \ + ${@:4} diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/dist_train.sh b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/dist_train.sh new file mode 100644 index 0000000..a857df7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/dist_train.sh @@ -0,0 +1,17 @@ +CONFIG=$1 +GPUS=$2 +NNODES=${NNODES:-1} +NODE_RANK=${NODE_RANK:-0} +PORT=${PORT:-29500} +MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +python -m torch.distributed.launch \ + --nnodes=$NNODES \ + --node_rank=$NODE_RANK \ + --master_addr=$MASTER_ADDR \ + --nproc_per_node=$GPUS \ + --master_port=$PORT \ + $(dirname "$0")/train.py \ + $CONFIG \ + --launcher pytorch ${@:3} diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/get_flops.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/get_flops.py new file mode 100644 index 0000000..e30c36f --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/get_flops.py @@ -0,0 +1,60 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse + +from mmcv import Config +from mmcv.cnn import get_model_complexity_info + +from mmseg.models import build_segmentor + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Get the FLOPs of a segmentor') + parser.add_argument('config', help='train config file path') + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=[2048, 1024], + help='input image size') + args = parser.parse_args() + return args + + +def main(): + + args = parse_args() + + if len(args.shape) == 1: + input_shape = (3, args.shape[0], args.shape[0]) + elif len(args.shape) == 2: + input_shape = (3, ) + tuple(args.shape) + else: + raise ValueError('invalid input shape') + + cfg = Config.fromfile(args.config) + cfg.model.pretrained = None + model = build_segmentor( + cfg.model, + train_cfg=cfg.get('train_cfg'), + test_cfg=cfg.get('test_cfg')).cuda() + model.eval() + + if hasattr(model, 'forward_dummy'): + model.forward = model.forward_dummy + else: + raise NotImplementedError( + 'FLOPs counter is currently not currently supported with {}'. + format(model.__class__.__name__)) + + flops, params = get_model_complexity_info(model, input_shape) + split_line = '=' * 30 + print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format( + split_line, input_shape, flops, params)) + print('!!!Please be cautious if you use the results in papers. ' + 'You may need to check if all ops are supported and verify that the ' + 'flops computation is correct.') + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/beit2mmseg.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/beit2mmseg.py new file mode 100644 index 0000000..91b91fa --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/beit2mmseg.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmcv +import torch +from mmcv.runner import CheckpointLoader + + +def convert_beit(ckpt): + new_ckpt = OrderedDict() + + for k, v in ckpt.items(): + if k.startswith('blocks'): + new_key = k.replace('blocks', 'layers') + if 'norm' in new_key: + new_key = new_key.replace('norm', 'ln') + elif 'mlp.fc1' in new_key: + new_key = new_key.replace('mlp.fc1', 'ffn.layers.0.0') + elif 'mlp.fc2' in new_key: + new_key = new_key.replace('mlp.fc2', 'ffn.layers.1') + new_ckpt[new_key] = v + elif k.startswith('patch_embed'): + new_key = k.replace('patch_embed.proj', 'patch_embed.projection') + new_ckpt[new_key] = v + else: + new_key = k + new_ckpt[new_key] = v + + return new_ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in official pretrained beit models to' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + weight = convert_beit(state_dict) + mmcv.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/mit2mmseg.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/mit2mmseg.py new file mode 100644 index 0000000..2eff1f7 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/mit2mmseg.py @@ -0,0 +1,82 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmcv +import torch +from mmcv.runner import CheckpointLoader + + +def convert_mit(ckpt): + new_ckpt = OrderedDict() + # Process the concat between q linear weights and kv linear weights + for k, v in ckpt.items(): + if k.startswith('head'): + continue + # patch embedding conversion + elif k.startswith('patch_embed'): + stage_i = int(k.split('.')[0].replace('patch_embed', '')) + new_k = k.replace(f'patch_embed{stage_i}', f'layers.{stage_i-1}.0') + new_v = v + if 'proj.' in new_k: + new_k = new_k.replace('proj.', 'projection.') + # transformer encoder layer conversion + elif k.startswith('block'): + stage_i = int(k.split('.')[0].replace('block', '')) + new_k = k.replace(f'block{stage_i}', f'layers.{stage_i-1}.1') + new_v = v + if 'attn.q.' in new_k: + sub_item_k = k.replace('q.', 'kv.') + new_k = new_k.replace('q.', 'attn.in_proj_') + new_v = torch.cat([v, ckpt[sub_item_k]], dim=0) + elif 'attn.kv.' in new_k: + continue + elif 'attn.proj.' in new_k: + new_k = new_k.replace('proj.', 'attn.out_proj.') + elif 'attn.sr.' in new_k: + new_k = new_k.replace('sr.', 'sr.') + elif 'mlp.' in new_k: + string = f'{new_k}-' + new_k = new_k.replace('mlp.', 'ffn.layers.') + if 'fc1.weight' in new_k or 'fc2.weight' in new_k: + new_v = v.reshape((*v.shape, 1, 1)) + new_k = new_k.replace('fc1.', '0.') + new_k = new_k.replace('dwconv.dwconv.', '1.') + new_k = new_k.replace('fc2.', '4.') + string += f'{new_k} {v.shape}-{new_v.shape}' + # norm layer conversion + elif k.startswith('norm'): + stage_i = int(k.split('.')[0].replace('norm', '')) + new_k = k.replace(f'norm{stage_i}', f'layers.{stage_i-1}.2') + new_v = v + else: + new_k = k + new_v = v + new_ckpt[new_k] = new_v + return new_ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in official pretrained segformer to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + weight = convert_mit(state_dict) + mmcv.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/stdc2mmseg.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/stdc2mmseg.py new file mode 100644 index 0000000..9241f86 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/stdc2mmseg.py @@ -0,0 +1,71 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +import mmcv +import torch +from mmcv.runner import CheckpointLoader + + +def convert_stdc(ckpt, stdc_type): + new_state_dict = {} + if stdc_type == 'STDC1': + stage_lst = ['0', '1', '2.0', '2.1', '3.0', '3.1', '4.0', '4.1'] + else: + stage_lst = [ + '0', '1', '2.0', '2.1', '2.2', '2.3', '3.0', '3.1', '3.2', '3.3', + '3.4', '4.0', '4.1', '4.2' + ] + for k, v in ckpt.items(): + ori_k = k + flag = False + if 'cp.' in k: + k = k.replace('cp.', '') + if 'features.' in k: + num_layer = int(k.split('.')[1]) + feature_key_lst = 'features.' + str(num_layer) + '.' + stages_key_lst = 'stages.' + stage_lst[num_layer] + '.' + k = k.replace(feature_key_lst, stages_key_lst) + flag = True + if 'conv_list' in k: + k = k.replace('conv_list', 'layers') + flag = True + if 'avd_layer.' in k: + if 'avd_layer.0' in k: + k = k.replace('avd_layer.0', 'downsample.conv') + elif 'avd_layer.1' in k: + k = k.replace('avd_layer.1', 'downsample.bn') + flag = True + if flag: + new_state_dict[k] = ckpt[ori_k] + + return new_state_dict + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in official pretrained STDC1/2 to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + parser.add_argument('type', help='model type: STDC1 or STDC2') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + + assert args.type in ['STDC1', + 'STDC2'], 'STD type should be STDC1 or STDC2!' + weight = convert_stdc(state_dict, args.type) + mmcv.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/swin2mmseg.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/swin2mmseg.py new file mode 100644 index 0000000..03b24ce --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/swin2mmseg.py @@ -0,0 +1,87 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmcv +import torch +from mmcv.runner import CheckpointLoader + + +def convert_swin(ckpt): + new_ckpt = OrderedDict() + + def correct_unfold_reduction_order(x): + out_channel, in_channel = x.shape + x = x.reshape(out_channel, 4, in_channel // 4) + x = x[:, [0, 2, 1, 3], :].transpose(1, + 2).reshape(out_channel, in_channel) + return x + + def correct_unfold_norm_order(x): + in_channel = x.shape[0] + x = x.reshape(4, in_channel // 4) + x = x[[0, 2, 1, 3], :].transpose(0, 1).reshape(in_channel) + return x + + for k, v in ckpt.items(): + if k.startswith('head'): + continue + elif k.startswith('layers'): + new_v = v + if 'attn.' in k: + new_k = k.replace('attn.', 'attn.w_msa.') + elif 'mlp.' in k: + if 'mlp.fc1.' in k: + new_k = k.replace('mlp.fc1.', 'ffn.layers.0.0.') + elif 'mlp.fc2.' in k: + new_k = k.replace('mlp.fc2.', 'ffn.layers.1.') + else: + new_k = k.replace('mlp.', 'ffn.') + elif 'downsample' in k: + new_k = k + if 'reduction.' in k: + new_v = correct_unfold_reduction_order(v) + elif 'norm.' in k: + new_v = correct_unfold_norm_order(v) + else: + new_k = k + new_k = new_k.replace('layers', 'stages', 1) + elif k.startswith('patch_embed'): + new_v = v + if 'proj' in k: + new_k = k.replace('proj', 'projection') + else: + new_k = k + else: + new_v = v + new_k = k + + new_ckpt[new_k] = new_v + + return new_ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in official pretrained swin models to' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + weight = convert_swin(state_dict) + mmcv.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/twins2mmseg.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/twins2mmseg.py new file mode 100644 index 0000000..ab64aa5 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/twins2mmseg.py @@ -0,0 +1,87 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmcv +import torch +from mmcv.runner import CheckpointLoader + + +def convert_twins(args, ckpt): + + new_ckpt = OrderedDict() + + for k, v in list(ckpt.items()): + new_v = v + if k.startswith('head'): + continue + elif k.startswith('patch_embeds'): + if 'proj.' in k: + new_k = k.replace('proj.', 'projection.') + else: + new_k = k + elif k.startswith('blocks'): + # Union + if 'attn.q.' in k: + new_k = k.replace('q.', 'attn.in_proj_') + new_v = torch.cat([v, ckpt[k.replace('attn.q.', 'attn.kv.')]], + dim=0) + elif 'mlp.fc1' in k: + new_k = k.replace('mlp.fc1', 'ffn.layers.0.0') + elif 'mlp.fc2' in k: + new_k = k.replace('mlp.fc2', 'ffn.layers.1') + # Only pcpvt + elif args.model == 'pcpvt': + if 'attn.proj.' in k: + new_k = k.replace('proj.', 'attn.out_proj.') + else: + new_k = k + + # Only svt + else: + if 'attn.proj.' in k: + k_lst = k.split('.') + if int(k_lst[2]) % 2 == 1: + new_k = k.replace('proj.', 'attn.out_proj.') + else: + new_k = k + else: + new_k = k + new_k = new_k.replace('blocks.', 'layers.') + elif k.startswith('pos_block'): + new_k = k.replace('pos_block', 'position_encodings') + if 'proj.0.' in new_k: + new_k = new_k.replace('proj.0.', 'proj.') + else: + new_k = k + if 'attn.kv.' not in k: + new_ckpt[new_k] = new_v + return new_ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in timm pretrained vit models to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + parser.add_argument('model', help='model: pcpvt or svt') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + + if 'state_dict' in checkpoint: + # timm checkpoint + state_dict = checkpoint['state_dict'] + else: + state_dict = checkpoint + + weight = convert_twins(args, state_dict) + mmcv.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/vit2mmseg.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/vit2mmseg.py new file mode 100644 index 0000000..bc18ebe --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/vit2mmseg.py @@ -0,0 +1,70 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp +from collections import OrderedDict + +import mmcv +import torch +from mmcv.runner import CheckpointLoader + + +def convert_vit(ckpt): + + new_ckpt = OrderedDict() + + for k, v in ckpt.items(): + if k.startswith('head'): + continue + if k.startswith('norm'): + new_k = k.replace('norm.', 'ln1.') + elif k.startswith('patch_embed'): + if 'proj' in k: + new_k = k.replace('proj', 'projection') + else: + new_k = k + elif k.startswith('blocks'): + if 'norm' in k: + new_k = k.replace('norm', 'ln') + elif 'mlp.fc1' in k: + new_k = k.replace('mlp.fc1', 'ffn.layers.0.0') + elif 'mlp.fc2' in k: + new_k = k.replace('mlp.fc2', 'ffn.layers.1') + elif 'attn.qkv' in k: + new_k = k.replace('attn.qkv.', 'attn.attn.in_proj_') + elif 'attn.proj' in k: + new_k = k.replace('attn.proj', 'attn.attn.out_proj') + else: + new_k = k + new_k = new_k.replace('blocks.', 'layers.') + else: + new_k = k + new_ckpt[new_k] = v + + return new_ckpt + + +def main(): + parser = argparse.ArgumentParser( + description='Convert keys in timm pretrained vit models to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu') + if 'state_dict' in checkpoint: + # timm checkpoint + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + # deit checkpoint + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + weight = convert_vit(state_dict) + mmcv.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(weight, args.dst) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/vitjax2mmseg.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/vitjax2mmseg.py new file mode 100644 index 0000000..585f408 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/model_converters/vitjax2mmseg.py @@ -0,0 +1,123 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os.path as osp + +import mmcv +import numpy as np +import torch + + +def vit_jax_to_torch(jax_weights, num_layer=12): + torch_weights = dict() + + # patch embedding + conv_filters = jax_weights['embedding/kernel'] + conv_filters = conv_filters.permute(3, 2, 0, 1) + torch_weights['patch_embed.projection.weight'] = conv_filters + torch_weights['patch_embed.projection.bias'] = jax_weights[ + 'embedding/bias'] + + # pos embedding + torch_weights['pos_embed'] = jax_weights[ + 'Transformer/posembed_input/pos_embedding'] + + # cls token + torch_weights['cls_token'] = jax_weights['cls'] + + # head + torch_weights['ln1.weight'] = jax_weights['Transformer/encoder_norm/scale'] + torch_weights['ln1.bias'] = jax_weights['Transformer/encoder_norm/bias'] + + # transformer blocks + for i in range(num_layer): + jax_block = f'Transformer/encoderblock_{i}' + torch_block = f'layers.{i}' + + # attention norm + torch_weights[f'{torch_block}.ln1.weight'] = jax_weights[ + f'{jax_block}/LayerNorm_0/scale'] + torch_weights[f'{torch_block}.ln1.bias'] = jax_weights[ + f'{jax_block}/LayerNorm_0/bias'] + + # attention + query_weight = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/query/kernel'] + query_bias = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/query/bias'] + key_weight = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/key/kernel'] + key_bias = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/key/bias'] + value_weight = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/value/kernel'] + value_bias = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/value/bias'] + + qkv_weight = torch.from_numpy( + np.stack((query_weight, key_weight, value_weight), 1)) + qkv_weight = torch.flatten(qkv_weight, start_dim=1) + qkv_bias = torch.from_numpy( + np.stack((query_bias, key_bias, value_bias), 0)) + qkv_bias = torch.flatten(qkv_bias, start_dim=0) + + torch_weights[f'{torch_block}.attn.attn.in_proj_weight'] = qkv_weight + torch_weights[f'{torch_block}.attn.attn.in_proj_bias'] = qkv_bias + to_out_weight = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/out/kernel'] + to_out_weight = torch.flatten(to_out_weight, start_dim=0, end_dim=1) + torch_weights[ + f'{torch_block}.attn.attn.out_proj.weight'] = to_out_weight + torch_weights[f'{torch_block}.attn.attn.out_proj.bias'] = jax_weights[ + f'{jax_block}/MultiHeadDotProductAttention_1/out/bias'] + + # mlp norm + torch_weights[f'{torch_block}.ln2.weight'] = jax_weights[ + f'{jax_block}/LayerNorm_2/scale'] + torch_weights[f'{torch_block}.ln2.bias'] = jax_weights[ + f'{jax_block}/LayerNorm_2/bias'] + + # mlp + torch_weights[f'{torch_block}.ffn.layers.0.0.weight'] = jax_weights[ + f'{jax_block}/MlpBlock_3/Dense_0/kernel'] + torch_weights[f'{torch_block}.ffn.layers.0.0.bias'] = jax_weights[ + f'{jax_block}/MlpBlock_3/Dense_0/bias'] + torch_weights[f'{torch_block}.ffn.layers.1.weight'] = jax_weights[ + f'{jax_block}/MlpBlock_3/Dense_1/kernel'] + torch_weights[f'{torch_block}.ffn.layers.1.bias'] = jax_weights[ + f'{jax_block}/MlpBlock_3/Dense_1/bias'] + + # transpose weights + for k, v in torch_weights.items(): + if 'weight' in k and 'patch_embed' not in k and 'ln' not in k: + v = v.permute(1, 0) + torch_weights[k] = v + + return torch_weights + + +def main(): + # stole refactoring code from Robin Strudel, thanks + parser = argparse.ArgumentParser( + description='Convert keys from jax official pretrained vit models to ' + 'MMSegmentation style.') + parser.add_argument('src', help='src model path or url') + # The dst path must be a full path of the new checkpoint. + parser.add_argument('dst', help='save path') + args = parser.parse_args() + + jax_weights = np.load(args.src) + jax_weights_tensor = {} + for key in jax_weights.files: + value = torch.from_numpy(jax_weights[key]) + jax_weights_tensor[key] = value + if 'L_16-i21k' in args.src: + num_layer = 24 + else: + num_layer = 12 + torch_weights = vit_jax_to_torch(jax_weights_tensor, num_layer) + mmcv.mkdir_or_exist(osp.dirname(args.dst)) + torch.save(torch_weights, args.dst) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/onnx2tensorrt.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/onnx2tensorrt.py new file mode 100644 index 0000000..0f60dce --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/onnx2tensorrt.py @@ -0,0 +1,289 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import warnings +from typing import Iterable, Optional, Union + +import matplotlib.pyplot as plt +import mmcv +import numpy as np +import onnxruntime as ort +import torch +from mmcv.ops import get_onnxruntime_op_path +from mmcv.tensorrt import (TRTWraper, is_tensorrt_plugin_loaded, onnx2trt, + save_trt_engine) + +from mmseg.apis.inference import LoadImage +from mmseg.datasets import DATASETS +from mmseg.datasets.pipelines import Compose + + +def get_GiB(x: int): + """return x GiB.""" + return x * (1 << 30) + + +def _prepare_input_img(img_path: str, + test_pipeline: Iterable[dict], + shape: Optional[Iterable] = None, + rescale_shape: Optional[Iterable] = None) -> dict: + # build the data pipeline + if shape is not None: + test_pipeline[1]['img_scale'] = (shape[1], shape[0]) + test_pipeline[1]['transforms'][0]['keep_ratio'] = False + test_pipeline = [LoadImage()] + test_pipeline[1:] + test_pipeline = Compose(test_pipeline) + # prepare data + data = dict(img=img_path) + data = test_pipeline(data) + imgs = data['img'] + img_metas = [i.data for i in data['img_metas']] + + if rescale_shape is not None: + for img_meta in img_metas: + img_meta['ori_shape'] = tuple(rescale_shape) + (3, ) + + mm_inputs = {'imgs': imgs, 'img_metas': img_metas} + + return mm_inputs + + +def _update_input_img(img_list: Iterable, img_meta_list: Iterable): + # update img and its meta list + N = img_list[0].size(0) + img_meta = img_meta_list[0][0] + img_shape = img_meta['img_shape'] + ori_shape = img_meta['ori_shape'] + pad_shape = img_meta['pad_shape'] + new_img_meta_list = [[{ + 'img_shape': + img_shape, + 'ori_shape': + ori_shape, + 'pad_shape': + pad_shape, + 'filename': + img_meta['filename'], + 'scale_factor': + (img_shape[1] / ori_shape[1], img_shape[0] / ori_shape[0]) * 2, + 'flip': + False, + } for _ in range(N)]] + + return img_list, new_img_meta_list + + +def show_result_pyplot(img: Union[str, np.ndarray], + result: np.ndarray, + palette: Optional[Iterable] = None, + fig_size: Iterable[int] = (15, 10), + opacity: float = 0.5, + title: str = '', + block: bool = True): + img = mmcv.imread(img) + img = img.copy() + seg = result[0] + seg = mmcv.imresize(seg, img.shape[:2][::-1]) + palette = np.array(palette) + assert palette.shape[1] == 3 + assert len(palette.shape) == 2 + assert 0 < opacity <= 1.0 + color_seg = np.zeros((seg.shape[0], seg.shape[1], 3), dtype=np.uint8) + for label, color in enumerate(palette): + color_seg[seg == label, :] = color + # convert to BGR + color_seg = color_seg[..., ::-1] + + img = img * (1 - opacity) + color_seg * opacity + img = img.astype(np.uint8) + + plt.figure(figsize=fig_size) + plt.imshow(mmcv.bgr2rgb(img)) + plt.title(title) + plt.tight_layout() + plt.show(block=block) + + +def onnx2tensorrt(onnx_file: str, + trt_file: str, + config: dict, + input_config: dict, + fp16: bool = False, + verify: bool = False, + show: bool = False, + dataset: str = 'CityscapesDataset', + workspace_size: int = 1, + verbose: bool = False): + import tensorrt as trt + min_shape = input_config['min_shape'] + max_shape = input_config['max_shape'] + # create trt engine and wrapper + opt_shape_dict = {'input': [min_shape, min_shape, max_shape]} + max_workspace_size = get_GiB(workspace_size) + trt_engine = onnx2trt( + onnx_file, + opt_shape_dict, + log_level=trt.Logger.VERBOSE if verbose else trt.Logger.ERROR, + fp16_mode=fp16, + max_workspace_size=max_workspace_size) + save_dir, _ = osp.split(trt_file) + if save_dir: + os.makedirs(save_dir, exist_ok=True) + save_trt_engine(trt_engine, trt_file) + print(f'Successfully created TensorRT engine: {trt_file}') + + if verify: + inputs = _prepare_input_img( + input_config['input_path'], + config.data.test.pipeline, + shape=min_shape[2:]) + + imgs = inputs['imgs'] + img_metas = inputs['img_metas'] + img_list = [img[None, :] for img in imgs] + img_meta_list = [[img_meta] for img_meta in img_metas] + # update img_meta + img_list, img_meta_list = _update_input_img(img_list, img_meta_list) + + if max_shape[0] > 1: + # concate flip image for batch test + flip_img_list = [_.flip(-1) for _ in img_list] + img_list = [ + torch.cat((ori_img, flip_img), 0) + for ori_img, flip_img in zip(img_list, flip_img_list) + ] + + # Get results from ONNXRuntime + ort_custom_op_path = get_onnxruntime_op_path() + session_options = ort.SessionOptions() + if osp.exists(ort_custom_op_path): + session_options.register_custom_ops_library(ort_custom_op_path) + sess = ort.InferenceSession(onnx_file, session_options) + sess.set_providers(['CPUExecutionProvider'], [{}]) # use cpu mode + onnx_output = sess.run(['output'], + {'input': img_list[0].detach().numpy()})[0][0] + + # Get results from TensorRT + trt_model = TRTWraper(trt_file, ['input'], ['output']) + with torch.no_grad(): + trt_outputs = trt_model({'input': img_list[0].contiguous().cuda()}) + trt_output = trt_outputs['output'][0].cpu().detach().numpy() + + if show: + dataset = DATASETS.get(dataset) + assert dataset is not None + palette = dataset.PALETTE + + show_result_pyplot( + input_config['input_path'], + (onnx_output[0].astype(np.uint8), ), + palette=palette, + title='ONNXRuntime', + block=False) + show_result_pyplot( + input_config['input_path'], (trt_output[0].astype(np.uint8), ), + palette=palette, + title='TensorRT') + + np.testing.assert_allclose( + onnx_output, trt_output, rtol=1e-03, atol=1e-05) + print('TensorRT and ONNXRuntime output all close.') + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert MMSegmentation models from ONNX to TensorRT') + parser.add_argument('config', help='Config file of the model') + parser.add_argument('model', help='Path to the input ONNX model') + parser.add_argument( + '--trt-file', type=str, help='Path to the output TensorRT engine') + parser.add_argument( + '--max-shape', + type=int, + nargs=4, + default=[1, 3, 400, 600], + help='Maximum shape of model input.') + parser.add_argument( + '--min-shape', + type=int, + nargs=4, + default=[1, 3, 400, 600], + help='Minimum shape of model input.') + parser.add_argument('--fp16', action='store_true', help='Enable fp16 mode') + parser.add_argument( + '--workspace-size', + type=int, + default=1, + help='Max workspace size in GiB') + parser.add_argument( + '--input-img', type=str, default='', help='Image for test') + parser.add_argument( + '--show', action='store_true', help='Whether to show output results') + parser.add_argument( + '--dataset', + type=str, + default='CityscapesDataset', + help='Dataset name') + parser.add_argument( + '--verify', + action='store_true', + help='Verify the outputs of ONNXRuntime and TensorRT') + parser.add_argument( + '--verbose', + action='store_true', + help='Whether to verbose logging messages while creating \ + TensorRT engine.') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + + assert is_tensorrt_plugin_loaded(), 'TensorRT plugin should be compiled.' + args = parse_args() + + if not args.input_img: + args.input_img = osp.join(osp.dirname(__file__), '../demo/demo.png') + + # check arguments + assert osp.exists(args.config), 'Config {} not found.'.format(args.config) + assert osp.exists(args.model), \ + 'ONNX model {} not found.'.format(args.model) + assert args.workspace_size >= 0, 'Workspace size less than 0.' + assert DATASETS.get(args.dataset) is not None, \ + 'Dataset {} does not found.'.format(args.dataset) + for max_value, min_value in zip(args.max_shape, args.min_shape): + assert max_value >= min_value, \ + 'max_shape should be larger than min shape' + + input_config = { + 'min_shape': args.min_shape, + 'max_shape': args.max_shape, + 'input_path': args.input_img + } + + cfg = mmcv.Config.fromfile(args.config) + onnx2tensorrt( + args.model, + args.trt_file, + cfg, + input_config, + fp16=args.fp16, + verify=args.verify, + show=args.show, + dataset=args.dataset, + workspace_size=args.workspace_size, + verbose=args.verbose) + + # Following strings of text style are from colorama package + bright_style, reset_style = '\x1b[1m', '\x1b[0m' + red_text, blue_text = '\x1b[31m', '\x1b[34m' + white_background = '\x1b[107m' + + msg = white_background + bright_style + red_text + msg += 'DeprecationWarning: This tool will be deprecated in future. ' + msg += blue_text + 'Welcome to use the unified model deployment toolbox ' + msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy' + msg += reset_style + warnings.warn(msg) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/print_config.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/print_config.py new file mode 100644 index 0000000..3f9c08d --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/print_config.py @@ -0,0 +1,69 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import warnings + +from mmcv import Config, DictAction + +from mmseg.apis import init_segmentor + + +def parse_args(): + parser = argparse.ArgumentParser(description='Print the whole config') + parser.add_argument('config', help='config file path') + parser.add_argument( + '--graph', action='store_true', help='print the models graph') + parser.add_argument( + '--options', + nargs='+', + action=DictAction, + help="--options is deprecated in favor of --cfg_options' and it will " + 'not be supported in version v0.22.0. Override some settings in the ' + 'used config, the key-value pair in xxx=yyy format will be merged ' + 'into config file. If the value to be overwritten is a list, it ' + 'should be like key="[a,b]" or key=a,b It also allows nested ' + 'list/tuple values, e.g. key="[(a,b),(c,d)]" Note that the quotation ' + 'marks are necessary and that no white space is allowed.') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + args = parser.parse_args() + + if args.options and args.cfg_options: + raise ValueError( + '--options and --cfg-options cannot be both ' + 'specified, --options is deprecated in favor of --cfg-options. ' + '--options will not be supported in version v0.22.0.') + if args.options: + warnings.warn('--options is deprecated in favor of --cfg-options, ' + '--options will not be supported in version v0.22.0.') + args.cfg_options = args.options + + return args + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + print(f'Config:\n{cfg.pretty_text}') + # dump config + cfg.dump('example.py') + # dump models graph + if args.graph: + model = init_segmentor(args.config, device='cpu') + print(f'Model graph:\n{str(model)}') + with open('example-graph.txt', 'w') as f: + f.writelines(str(model)) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/publish_model.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/publish_model.py new file mode 100644 index 0000000..e266057 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/publish_model.py @@ -0,0 +1,36 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import subprocess + +import torch + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Process a checkpoint to be published') + parser.add_argument('in_file', help='input checkpoint filename') + parser.add_argument('out_file', help='output checkpoint filename') + args = parser.parse_args() + return args + + +def process_checkpoint(in_file, out_file): + checkpoint = torch.load(in_file, map_location='cpu') + # remove optimizer for smaller file size + if 'optimizer' in checkpoint: + del checkpoint['optimizer'] + # if it is necessary to remove some sensitive data in checkpoint['meta'], + # add the code here. + torch.save(checkpoint, out_file) + sha = subprocess.check_output(['sha256sum', out_file]).decode() + final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8]) + subprocess.Popen(['mv', out_file, final_file]) + + +def main(): + args = parse_args() + process_checkpoint(args.in_file, args.out_file) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/pytorch2onnx.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/pytorch2onnx.py new file mode 100644 index 0000000..060d187 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/pytorch2onnx.py @@ -0,0 +1,405 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import warnings +from functools import partial + +import mmcv +import numpy as np +import onnxruntime as rt +import torch +import torch._C +import torch.serialization +from mmcv import DictAction +from mmcv.onnx import register_extra_symbolics +from mmcv.runner import load_checkpoint +from torch import nn + +from mmseg.apis import show_result_pyplot +from mmseg.apis.inference import LoadImage +from mmseg.datasets.pipelines import Compose +from mmseg.models import build_segmentor +from mmseg.ops import resize + +torch.manual_seed(3) + + +def _convert_batchnorm(module): + module_output = module + if isinstance(module, torch.nn.SyncBatchNorm): + module_output = torch.nn.BatchNorm2d(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) + if module.affine: + module_output.weight.data = module.weight.data.clone().detach() + module_output.bias.data = module.bias.data.clone().detach() + # keep requires_grad unchanged + module_output.weight.requires_grad = module.weight.requires_grad + module_output.bias.requires_grad = module.bias.requires_grad + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + for name, child in module.named_children(): + module_output.add_module(name, _convert_batchnorm(child)) + del module + return module_output + + +def _demo_mm_inputs(input_shape, num_classes): + """Create a superset of inputs needed to run test or train batches. + + Args: + input_shape (tuple): + input batch dimensions + num_classes (int): + number of semantic classes + """ + (N, C, H, W) = input_shape + rng = np.random.RandomState(0) + imgs = rng.rand(*input_shape) + segs = rng.randint( + low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8) + img_metas = [{ + 'img_shape': (H, W, C), + 'ori_shape': (H, W, C), + 'pad_shape': (H, W, C), + 'filename': '.png', + 'scale_factor': 1.0, + 'flip': False, + } for _ in range(N)] + mm_inputs = { + 'imgs': torch.FloatTensor(imgs).requires_grad_(True), + 'img_metas': img_metas, + 'gt_semantic_seg': torch.LongTensor(segs) + } + return mm_inputs + + +def _prepare_input_img(img_path, + test_pipeline, + shape=None, + rescale_shape=None): + # build the data pipeline + if shape is not None: + test_pipeline[1]['img_scale'] = (shape[1], shape[0]) + test_pipeline[1]['transforms'][0]['keep_ratio'] = False + test_pipeline = [LoadImage()] + test_pipeline[1:] + test_pipeline = Compose(test_pipeline) + # prepare data + data = dict(img=img_path) + data = test_pipeline(data) + imgs = data['img'] + img_metas = [i.data for i in data['img_metas']] + + if rescale_shape is not None: + for img_meta in img_metas: + img_meta['ori_shape'] = tuple(rescale_shape) + (3, ) + + mm_inputs = {'imgs': imgs, 'img_metas': img_metas} + + return mm_inputs + + +def _update_input_img(img_list, img_meta_list, update_ori_shape=False): + # update img and its meta list + N, C, H, W = img_list[0].shape + img_meta = img_meta_list[0][0] + img_shape = (H, W, C) + if update_ori_shape: + ori_shape = img_shape + else: + ori_shape = img_meta['ori_shape'] + pad_shape = img_shape + new_img_meta_list = [[{ + 'img_shape': + img_shape, + 'ori_shape': + ori_shape, + 'pad_shape': + pad_shape, + 'filename': + img_meta['filename'], + 'scale_factor': + (img_shape[1] / ori_shape[1], img_shape[0] / ori_shape[0]) * 2, + 'flip': + False, + } for _ in range(N)]] + + return img_list, new_img_meta_list + + +def pytorch2onnx(model, + mm_inputs, + opset_version=11, + show=False, + output_file='tmp.onnx', + verify=False, + dynamic_export=False): + """Export Pytorch model to ONNX model and verify the outputs are same + between Pytorch and ONNX. + + Args: + model (nn.Module): Pytorch model we want to export. + mm_inputs (dict): Contain the input tensors and img_metas information. + opset_version (int): The onnx op version. Default: 11. + show (bool): Whether print the computation graph. Default: False. + output_file (string): The path to where we store the output ONNX model. + Default: `tmp.onnx`. + verify (bool): Whether compare the outputs between Pytorch and ONNX. + Default: False. + dynamic_export (bool): Whether to export ONNX with dynamic axis. + Default: False. + """ + model.cpu().eval() + test_mode = model.test_cfg.mode + + if isinstance(model.decode_head, nn.ModuleList): + num_classes = model.decode_head[-1].num_classes + else: + num_classes = model.decode_head.num_classes + + imgs = mm_inputs.pop('imgs') + img_metas = mm_inputs.pop('img_metas') + + img_list = [img[None, :] for img in imgs] + img_meta_list = [[img_meta] for img_meta in img_metas] + # update img_meta + img_list, img_meta_list = _update_input_img(img_list, img_meta_list) + + # replace original forward function + origin_forward = model.forward + model.forward = partial( + model.forward, + img_metas=img_meta_list, + return_loss=False, + rescale=True) + dynamic_axes = None + if dynamic_export: + if test_mode == 'slide': + dynamic_axes = {'input': {0: 'batch'}, 'output': {1: 'batch'}} + else: + dynamic_axes = { + 'input': { + 0: 'batch', + 2: 'height', + 3: 'width' + }, + 'output': { + 1: 'batch', + 2: 'height', + 3: 'width' + } + } + + register_extra_symbolics(opset_version) + with torch.no_grad(): + torch.onnx.export( + model, (img_list, ), + output_file, + input_names=['input'], + output_names=['output'], + export_params=True, + keep_initializers_as_inputs=False, + verbose=show, + opset_version=opset_version, + dynamic_axes=dynamic_axes) + print(f'Successfully exported ONNX model: {output_file}') + model.forward = origin_forward + + if verify: + # check by onnx + import onnx + onnx_model = onnx.load(output_file) + onnx.checker.check_model(onnx_model) + + if dynamic_export and test_mode == 'whole': + # scale image for dynamic shape test + img_list = [resize(_, scale_factor=1.5) for _ in img_list] + # concate flip image for batch test + flip_img_list = [_.flip(-1) for _ in img_list] + img_list = [ + torch.cat((ori_img, flip_img), 0) + for ori_img, flip_img in zip(img_list, flip_img_list) + ] + + # update img_meta + img_list, img_meta_list = _update_input_img( + img_list, img_meta_list, test_mode == 'whole') + + # check the numerical value + # get pytorch output + with torch.no_grad(): + pytorch_result = model(img_list, img_meta_list, return_loss=False) + pytorch_result = np.stack(pytorch_result, 0) + + # get onnx output + input_all = [node.name for node in onnx_model.graph.input] + input_initializer = [ + node.name for node in onnx_model.graph.initializer + ] + net_feed_input = list(set(input_all) - set(input_initializer)) + assert (len(net_feed_input) == 1) + sess = rt.InferenceSession(output_file) + onnx_result = sess.run( + None, {net_feed_input[0]: img_list[0].detach().numpy()})[0][0] + # show segmentation results + if show: + import os.path as osp + + import cv2 + img = img_meta_list[0][0]['filename'] + if not osp.exists(img): + img = imgs[0][:3, ...].permute(1, 2, 0) * 255 + img = img.detach().numpy().astype(np.uint8) + ori_shape = img.shape[:2] + else: + ori_shape = LoadImage()({'img': img})['ori_shape'] + + # resize onnx_result to ori_shape + onnx_result_ = cv2.resize(onnx_result[0].astype(np.uint8), + (ori_shape[1], ori_shape[0])) + show_result_pyplot( + model, + img, (onnx_result_, ), + palette=model.PALETTE, + block=False, + title='ONNXRuntime', + opacity=0.5) + + # resize pytorch_result to ori_shape + pytorch_result_ = cv2.resize(pytorch_result[0].astype(np.uint8), + (ori_shape[1], ori_shape[0])) + show_result_pyplot( + model, + img, (pytorch_result_, ), + title='PyTorch', + palette=model.PALETTE, + opacity=0.5) + # compare results + np.testing.assert_allclose( + pytorch_result.astype(np.float32) / num_classes, + onnx_result.astype(np.float32) / num_classes, + rtol=1e-5, + atol=1e-5, + err_msg='The outputs are different between Pytorch and ONNX') + print('The outputs are same between Pytorch and ONNX') + + +def parse_args(): + parser = argparse.ArgumentParser(description='Convert MMSeg to ONNX') + parser.add_argument('config', help='test config file path') + parser.add_argument('--checkpoint', help='checkpoint file', default=None) + parser.add_argument( + '--input-img', type=str, help='Images for input', default=None) + parser.add_argument( + '--show', + action='store_true', + help='show onnx graph and segmentation results') + parser.add_argument( + '--verify', action='store_true', help='verify the onnx model') + parser.add_argument('--output-file', type=str, default='tmp.onnx') + parser.add_argument('--opset-version', type=int, default=11) + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=None, + help='input image height and width.') + parser.add_argument( + '--rescale_shape', + type=int, + nargs='+', + default=None, + help='output image rescale height and width, work for slide mode.') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='Override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--dynamic-export', + action='store_true', + help='Whether to export onnx with dynamic axis.') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + + cfg = mmcv.Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + cfg.model.pretrained = None + + if args.shape is None: + img_scale = cfg.test_pipeline[1]['img_scale'] + input_shape = (1, 3, img_scale[1], img_scale[0]) + elif len(args.shape) == 1: + input_shape = (1, 3, args.shape[0], args.shape[0]) + elif len(args.shape) == 2: + input_shape = ( + 1, + 3, + ) + tuple(args.shape) + else: + raise ValueError('invalid input shape') + + test_mode = cfg.model.test_cfg.mode + + # build the model and load checkpoint + cfg.model.train_cfg = None + segmentor = build_segmentor( + cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) + # convert SyncBN to BN + segmentor = _convert_batchnorm(segmentor) + + if args.checkpoint: + checkpoint = load_checkpoint( + segmentor, args.checkpoint, map_location='cpu') + segmentor.CLASSES = checkpoint['meta']['CLASSES'] + segmentor.PALETTE = checkpoint['meta']['PALETTE'] + + # read input or create dummpy input + if args.input_img is not None: + preprocess_shape = (input_shape[2], input_shape[3]) + rescale_shape = None + if args.rescale_shape is not None: + rescale_shape = [args.rescale_shape[0], args.rescale_shape[1]] + mm_inputs = _prepare_input_img( + args.input_img, + cfg.data.test.pipeline, + shape=preprocess_shape, + rescale_shape=rescale_shape) + else: + if isinstance(segmentor.decode_head, nn.ModuleList): + num_classes = segmentor.decode_head[-1].num_classes + else: + num_classes = segmentor.decode_head.num_classes + mm_inputs = _demo_mm_inputs(input_shape, num_classes) + + # convert model to onnx file + pytorch2onnx( + segmentor, + mm_inputs, + opset_version=args.opset_version, + show=args.show, + output_file=args.output_file, + verify=args.verify, + dynamic_export=args.dynamic_export) + + # Following strings of text style are from colorama package + bright_style, reset_style = '\x1b[1m', '\x1b[0m' + red_text, blue_text = '\x1b[31m', '\x1b[34m' + white_background = '\x1b[107m' + + msg = white_background + bright_style + red_text + msg += 'DeprecationWarning: This tool will be deprecated in future. ' + msg += blue_text + 'Welcome to use the unified model deployment toolbox ' + msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy' + msg += reset_style + warnings.warn(msg) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/pytorch2torchscript.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/pytorch2torchscript.py new file mode 100644 index 0000000..d76f5ec --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/pytorch2torchscript.py @@ -0,0 +1,185 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse + +import mmcv +import numpy as np +import torch +import torch._C +import torch.serialization +from mmcv.runner import load_checkpoint +from torch import nn + +from mmseg.models import build_segmentor + +torch.manual_seed(3) + + +def digit_version(version_str): + digit_version = [] + for x in version_str.split('.'): + if x.isdigit(): + digit_version.append(int(x)) + elif x.find('rc') != -1: + patch_version = x.split('rc') + digit_version.append(int(patch_version[0]) - 1) + digit_version.append(int(patch_version[1])) + return digit_version + + +def check_torch_version(): + torch_minimum_version = '1.8.0' + torch_version = digit_version(torch.__version__) + + assert (torch_version >= digit_version(torch_minimum_version)), \ + f'Torch=={torch.__version__} is not support for converting to ' \ + f'torchscript. Please install pytorch>={torch_minimum_version}.' + + +def _convert_batchnorm(module): + module_output = module + if isinstance(module, torch.nn.SyncBatchNorm): + module_output = torch.nn.BatchNorm2d(module.num_features, module.eps, + module.momentum, module.affine, + module.track_running_stats) + if module.affine: + module_output.weight.data = module.weight.data.clone().detach() + module_output.bias.data = module.bias.data.clone().detach() + # keep requires_grad unchanged + module_output.weight.requires_grad = module.weight.requires_grad + module_output.bias.requires_grad = module.bias.requires_grad + module_output.running_mean = module.running_mean + module_output.running_var = module.running_var + module_output.num_batches_tracked = module.num_batches_tracked + for name, child in module.named_children(): + module_output.add_module(name, _convert_batchnorm(child)) + del module + return module_output + + +def _demo_mm_inputs(input_shape, num_classes): + """Create a superset of inputs needed to run test or train batches. + + Args: + input_shape (tuple): + input batch dimensions + num_classes (int): + number of semantic classes + """ + (N, C, H, W) = input_shape + rng = np.random.RandomState(0) + imgs = rng.rand(*input_shape) + segs = rng.randint( + low=0, high=num_classes - 1, size=(N, 1, H, W)).astype(np.uint8) + img_metas = [{ + 'img_shape': (H, W, C), + 'ori_shape': (H, W, C), + 'pad_shape': (H, W, C), + 'filename': '.png', + 'scale_factor': 1.0, + 'flip': False, + } for _ in range(N)] + mm_inputs = { + 'imgs': torch.FloatTensor(imgs).requires_grad_(True), + 'img_metas': img_metas, + 'gt_semantic_seg': torch.LongTensor(segs) + } + return mm_inputs + + +def pytorch2libtorch(model, + input_shape, + show=False, + output_file='tmp.pt', + verify=False): + """Export Pytorch model to TorchScript model and verify the outputs are + same between Pytorch and TorchScript. + + Args: + model (nn.Module): Pytorch model we want to export. + input_shape (tuple): Use this input shape to construct + the corresponding dummy input and execute the model. + show (bool): Whether print the computation graph. Default: False. + output_file (string): The path to where we store the + output TorchScript model. Default: `tmp.pt`. + verify (bool): Whether compare the outputs between + Pytorch and TorchScript. Default: False. + """ + if isinstance(model.decode_head, nn.ModuleList): + num_classes = model.decode_head[-1].num_classes + else: + num_classes = model.decode_head.num_classes + + mm_inputs = _demo_mm_inputs(input_shape, num_classes) + + imgs = mm_inputs.pop('imgs') + + # replace the original forword with forward_dummy + model.forward = model.forward_dummy + model.eval() + traced_model = torch.jit.trace( + model, + example_inputs=imgs, + check_trace=verify, + ) + + if show: + print(traced_model.graph) + + traced_model.save(output_file) + print('Successfully exported TorchScript model: {}'.format(output_file)) + + +def parse_args(): + parser = argparse.ArgumentParser( + description='Convert MMSeg to TorchScript') + parser.add_argument('config', help='test config file path') + parser.add_argument('--checkpoint', help='checkpoint file', default=None) + parser.add_argument( + '--show', action='store_true', help='show TorchScript graph') + parser.add_argument( + '--verify', action='store_true', help='verify the TorchScript model') + parser.add_argument('--output-file', type=str, default='tmp.pt') + parser.add_argument( + '--shape', + type=int, + nargs='+', + default=[512, 512], + help='input image size (height, width)') + args = parser.parse_args() + return args + + +if __name__ == '__main__': + args = parse_args() + check_torch_version() + + if len(args.shape) == 1: + input_shape = (1, 3, args.shape[0], args.shape[0]) + elif len(args.shape) == 2: + input_shape = ( + 1, + 3, + ) + tuple(args.shape) + else: + raise ValueError('invalid input shape') + + cfg = mmcv.Config.fromfile(args.config) + cfg.model.pretrained = None + + # build the model and load checkpoint + cfg.model.train_cfg = None + segmentor = build_segmentor( + cfg.model, train_cfg=None, test_cfg=cfg.get('test_cfg')) + # convert SyncBN to BN + segmentor = _convert_batchnorm(segmentor) + + if args.checkpoint: + load_checkpoint(segmentor, args.checkpoint, map_location='cpu') + + # convert the PyTorch model to LibTorch model + pytorch2libtorch( + segmentor, + input_shape, + show=args.show, + output_file=args.output_file, + verify=args.verify) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/slurm_test.sh b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/slurm_test.sh new file mode 100644 index 0000000..4e6f7bf --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/slurm_test.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -x + +PARTITION=$1 +JOB_NAME=$2 +CONFIG=$3 +CHECKPOINT=$4 +GPUS=${GPUS:-4} +GPUS_PER_NODE=${GPUS_PER_NODE:-4} +CPUS_PER_TASK=${CPUS_PER_TASK:-5} +PY_ARGS=${@:5} +SRUN_ARGS=${SRUN_ARGS:-""} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +srun -p ${PARTITION} \ + --job-name=${JOB_NAME} \ + --gres=gpu:${GPUS_PER_NODE} \ + --ntasks=${GPUS} \ + --ntasks-per-node=${GPUS_PER_NODE} \ + --cpus-per-task=${CPUS_PER_TASK} \ + --kill-on-bad-exit=1 \ + ${SRUN_ARGS} \ + python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS} diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/slurm_train.sh b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/slurm_train.sh new file mode 100644 index 0000000..ab23210 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/slurm_train.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +set -x + +PARTITION=$1 +JOB_NAME=$2 +CONFIG=$3 +GPUS=${GPUS:-4} +GPUS_PER_NODE=${GPUS_PER_NODE:-4} +CPUS_PER_TASK=${CPUS_PER_TASK:-5} +SRUN_ARGS=${SRUN_ARGS:-""} +PY_ARGS=${@:4} + +PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ +srun -p ${PARTITION} \ + --job-name=${JOB_NAME} \ + --gres=gpu:${GPUS_PER_NODE} \ + --ntasks=${GPUS} \ + --ntasks-per-node=${GPUS_PER_NODE} \ + --cpus-per-task=${CPUS_PER_TASK} \ + --kill-on-bad-exit=1 \ + ${SRUN_ARGS} \ + python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS} diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/test.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/test.py new file mode 100644 index 0000000..a643b08 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/test.py @@ -0,0 +1,320 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import shutil +import time +import warnings + +import mmcv +import torch +from mmcv.cnn.utils import revert_sync_batchnorm +from mmcv.runner import (get_dist_info, init_dist, load_checkpoint, + wrap_fp16_model) +from mmcv.utils import DictAction + +from mmseg import digit_version +from mmseg.apis import multi_gpu_test, single_gpu_test +from mmseg.datasets import build_dataloader, build_dataset +from mmseg.models import build_segmentor +from mmseg.utils import build_ddp, build_dp, get_device, setup_multi_processes + + +def parse_args(): + parser = argparse.ArgumentParser( + description='mmseg test (and eval) a model') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--work-dir', + help=('if specified, the evaluation metric results will be dumped' + 'into the directory as json')) + parser.add_argument( + '--aug-test', action='store_true', help='Use Flip and Multi scale aug') + parser.add_argument('--out', help='output result file in pickle format') + parser.add_argument( + '--format-only', + action='store_true', + help='Format the output results without perform evaluation. It is' + 'useful when you want to format the result to a specific format and ' + 'submit it to the test server') + parser.add_argument( + '--eval', + type=str, + nargs='+', + help='evaluation metrics, which depends on the dataset, e.g., "mIoU"' + ' for generic datasets, and "cityscapes" for Cityscapes') + parser.add_argument('--show', action='store_true', help='show results') + parser.add_argument( + '--show-dir', help='directory where painted images will be saved') + parser.add_argument( + '--gpu-collect', + action='store_true', + help='whether to use gpu to collect results.') + parser.add_argument( + '--gpu-id', + type=int, + default=0, + help='id of gpu to use ' + '(only applicable to non-distributed testing)') + parser.add_argument( + '--tmpdir', + help='tmp directory used for collecting results from multiple ' + 'workers, available when gpu_collect is not specified') + parser.add_argument( + '--options', + nargs='+', + action=DictAction, + help="--options is deprecated in favor of --cfg_options' and it will " + 'not be supported in version v0.22.0. Override some settings in the ' + 'used config, the key-value pair in xxx=yyy format will be merged ' + 'into config file. If the value to be overwritten is a list, it ' + 'should be like key="[a,b]" or key=a,b It also allows nested ' + 'list/tuple values, e.g. key="[(a,b),(c,d)]" Note that the quotation ' + 'marks are necessary and that no white space is allowed.') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--eval-options', + nargs='+', + action=DictAction, + help='custom options for evaluation') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + parser.add_argument('--local_rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + if args.options and args.cfg_options: + raise ValueError( + '--options and --cfg-options cannot be both ' + 'specified, --options is deprecated in favor of --cfg-options. ' + '--options will not be supported in version v0.22.0.') + if args.options: + warnings.warn('--options is deprecated in favor of --cfg-options. ' + '--options will not be supported in version v0.22.0.') + args.cfg_options = args.options + + return args + + +def main(): + args = parse_args() + assert args.out or args.eval or args.format_only or args.show \ + or args.show_dir, \ + ('Please specify at least one operation (save/eval/format/show the ' + 'results / save the results) with the argument "--out", "--eval"' + ', "--format-only", "--show" or "--show-dir"') + + if args.eval and args.format_only: + raise ValueError('--eval and --format_only cannot be both specified') + + if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): + raise ValueError('The output file must be a pkl file.') + + cfg = mmcv.Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # set multi-process settings + setup_multi_processes(cfg) + + # set cudnn_benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + if args.aug_test: + # hard code index + cfg.data.test.pipeline[1].img_ratios = [ + 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 + ] + cfg.data.test.pipeline[1].flip = True + cfg.model.pretrained = None + cfg.data.test.test_mode = True + + if args.gpu_id is not None: + cfg.gpu_ids = [args.gpu_id] + + # init distributed env first, since logger depends on the dist info. + if args.launcher == 'none': + cfg.gpu_ids = [args.gpu_id] + distributed = False + if len(cfg.gpu_ids) > 1: + warnings.warn(f'The gpu-ids is reset from {cfg.gpu_ids} to ' + f'{cfg.gpu_ids[0:1]} to avoid potential error in ' + 'non-distribute testing time.') + cfg.gpu_ids = cfg.gpu_ids[0:1] + else: + distributed = True + init_dist(args.launcher, **cfg.dist_params) + + rank, _ = get_dist_info() + # allows not to create + if args.work_dir is not None and rank == 0: + mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) + timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) + if args.aug_test: + json_file = osp.join(args.work_dir, + f'eval_multi_scale_{timestamp}.json') + else: + json_file = osp.join(args.work_dir, + f'eval_single_scale_{timestamp}.json') + elif rank == 0: + work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + mmcv.mkdir_or_exist(osp.abspath(work_dir)) + timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) + if args.aug_test: + json_file = osp.join(work_dir, + f'eval_multi_scale_{timestamp}.json') + else: + json_file = osp.join(work_dir, + f'eval_single_scale_{timestamp}.json') + + # build the dataloader + # TODO: support multiple images per gpu (only minor changes are needed) + dataset = build_dataset(cfg.data.test) + # The default loader config + loader_cfg = dict( + # cfg.gpus will be ignored if distributed + num_gpus=len(cfg.gpu_ids), + dist=distributed, + shuffle=False) + # The overall dataloader settings + loader_cfg.update({ + k: v + for k, v in cfg.data.items() if k not in [ + 'train', 'val', 'test', 'train_dataloader', 'val_dataloader', + 'test_dataloader' + ] + }) + test_loader_cfg = { + **loader_cfg, + 'samples_per_gpu': 1, + 'shuffle': False, # Not shuffle by default + **cfg.data.get('test_dataloader', {}) + } + # build the dataloader + data_loader = build_dataloader(dataset, **test_loader_cfg) + + # build the model and load checkpoint + cfg.model.train_cfg = None + model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + wrap_fp16_model(model) + checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu') + if 'CLASSES' in checkpoint.get('meta', {}): + model.CLASSES = checkpoint['meta']['CLASSES'] + else: + print('"CLASSES" not found in meta, use dataset.CLASSES instead') + model.CLASSES = dataset.CLASSES + if 'PALETTE' in checkpoint.get('meta', {}): + model.PALETTE = checkpoint['meta']['PALETTE'] + else: + print('"PALETTE" not found in meta, use dataset.PALETTE instead') + model.PALETTE = dataset.PALETTE + + # clean gpu memory when starting a new evaluation. + torch.cuda.empty_cache() + eval_kwargs = {} if args.eval_options is None else args.eval_options + + # Deprecated + efficient_test = eval_kwargs.get('efficient_test', False) + if efficient_test: + warnings.warn( + '``efficient_test=True`` does not have effect in tools/test.py, ' + 'the evaluation and format results are CPU memory efficient by ' + 'default') + + eval_on_format_results = ( + args.eval is not None and 'cityscapes' in args.eval) + if eval_on_format_results: + assert len(args.eval) == 1, 'eval on format results is not ' \ + 'applicable for metrics other than ' \ + 'cityscapes' + if args.format_only or eval_on_format_results: + if 'imgfile_prefix' in eval_kwargs: + tmpdir = eval_kwargs['imgfile_prefix'] + else: + tmpdir = '.format_cityscapes' + eval_kwargs.setdefault('imgfile_prefix', tmpdir) + mmcv.mkdir_or_exist(tmpdir) + else: + tmpdir = None + + cfg.device = get_device() + if not distributed: + warnings.warn( + 'SyncBN is only supported with DDP. To be compatible with DP, ' + 'we convert SyncBN to BN. Please use dist_train.sh which can ' + 'avoid this error.') + if not torch.cuda.is_available(): + assert digit_version(mmcv.__version__) >= digit_version('1.4.4'), \ + 'Please use MMCV >= 1.4.4 for CPU training!' + model = revert_sync_batchnorm(model) + model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids) + results = single_gpu_test( + model, + data_loader, + args.show, + args.show_dir, + False, + args.opacity, + pre_eval=args.eval is not None and not eval_on_format_results, + format_only=args.format_only or eval_on_format_results, + format_args=eval_kwargs) + else: + model = build_ddp( + model, + cfg.device, + device_ids=[int(os.environ['LOCAL_RANK'])], + broadcast_buffers=False) + results = multi_gpu_test( + model, + data_loader, + args.tmpdir, + args.gpu_collect, + False, + pre_eval=args.eval is not None and not eval_on_format_results, + format_only=args.format_only or eval_on_format_results, + format_args=eval_kwargs) + + rank, _ = get_dist_info() + if rank == 0: + if args.out: + warnings.warn( + 'The behavior of ``args.out`` has been changed since MMSeg ' + 'v0.16, the pickled outputs could be seg map as type of ' + 'np.array, pre-eval results or file paths for ' + '``dataset.format_results()``.') + print(f'\nwriting results to {args.out}') + mmcv.dump(results, args.out) + if args.eval: + eval_kwargs.update(metric=args.eval) + metric = dataset.evaluate(results, **eval_kwargs) + metric_dict = dict(config=args.config, metric=metric) + mmcv.dump(metric_dict, json_file, indent=4) + if tmpdir is not None and eval_on_format_results: + # remove tmp dir when cityscapes evaluation + shutil.rmtree(tmpdir) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/test_multiple_checkpoint_mmsegmentation.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/test_multiple_checkpoint_mmsegmentation.py new file mode 100644 index 0000000..3866659 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/test_multiple_checkpoint_mmsegmentation.py @@ -0,0 +1,325 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp +import shutil +import time +import warnings + +import mmcv +import torch +from mmcv.cnn.utils import revert_sync_batchnorm +from mmcv.runner import (get_dist_info, init_dist, load_checkpoint, + wrap_fp16_model) +from mmcv.utils import DictAction + +from mmseg import digit_version +from mmseg.apis import multi_gpu_test, single_gpu_test +from mmseg.datasets import build_dataloader, build_dataset +from mmseg.models import build_segmentor +from mmseg.utils import build_ddp, build_dp, get_device, setup_multi_processes + + +def parse_args(): + parser = argparse.ArgumentParser( + description='mmseg test (and eval) a model') + parser.add_argument('config', help='test config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--work-dir', + help=('if specified, the evaluation metric results will be dumped' + 'into the directory as json')) + parser.add_argument( + '--aug-test', action='store_true', help='Use Flip and Multi scale aug') + parser.add_argument('--out', help='output result file in pickle format') + parser.add_argument( + '--format-only', + action='store_true', + help='Format the output results without perform evaluation. It is' + 'useful when you want to format the result to a specific format and ' + 'submit it to the test server') + parser.add_argument( + '--eval', + type=str, + nargs='+', + help='evaluation metrics, which depends on the dataset, e.g., "mIoU"' + ' for generic datasets, and "cityscapes" for Cityscapes') + parser.add_argument('--show', action='store_true', help='show results') + parser.add_argument( + '--show-dir', help='directory where painted images will be saved') + parser.add_argument( + '--gpu-collect', + action='store_true', + help='whether to use gpu to collect results.') + parser.add_argument( + '--gpu-id', + type=int, + default=0, + help='id of gpu to use ' + '(only applicable to non-distributed testing)') + parser.add_argument( + '--tmpdir', + help='tmp directory used for collecting results from multiple ' + 'workers, available when gpu_collect is not specified') + parser.add_argument( + '--options', + nargs='+', + action=DictAction, + help="--options is deprecated in favor of --cfg_options' and it will " + 'not be supported in version v0.22.0. Override some settings in the ' + 'used config, the key-value pair in xxx=yyy format will be merged ' + 'into config file. If the value to be overwritten is a list, it ' + 'should be like key="[a,b]" or key=a,b It also allows nested ' + 'list/tuple values, e.g. key="[(a,b),(c,d)]" Note that the quotation ' + 'marks are necessary and that no white space is allowed.') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--eval-options', + nargs='+', + action=DictAction, + help='custom options for evaluation') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument( + '--opacity', + type=float, + default=0.5, + help='Opacity of painted segmentation map. In (0, 1] range.') + parser.add_argument('--local_rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + if args.options and args.cfg_options: + raise ValueError( + '--options and --cfg-options cannot be both ' + 'specified, --options is deprecated in favor of --cfg-options. ' + '--options will not be supported in version v0.22.0.') + if args.options: + warnings.warn('--options is deprecated in favor of --cfg-options. ' + '--options will not be supported in version v0.22.0.') + args.cfg_options = args.options + + return args + + +def main(): + args = parse_args() + assert args.out or args.eval or args.format_only or args.show \ + or args.show_dir, \ + ('Please specify at least one operation (save/eval/format/show the ' + 'results / save the results) with the argument "--out", "--eval"' + ', "--format-only", "--show" or "--show-dir"') + + if args.eval and args.format_only: + raise ValueError('--eval and --format_only cannot be both specified') + + if args.out is not None and not args.out.endswith(('.pkl', '.pickle')): + raise ValueError('The output file must be a pkl file.') +############################################################################### +# Trong phan duong dan toi checkpoint chi can chi duong dan toi thu muc thoi # +############################################################################### + for ckp in range(1, 71): + checkpoint_path = args.checkpoint + "//" + "epoch_" + str(ckp) + ".pth" + print("Now processing at check point: ", ckp) + cfg = mmcv.Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # set multi-process settings + setup_multi_processes(cfg) + + # set cudnn_benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + if args.aug_test: + # hard code index + cfg.data.test.pipeline[1].img_ratios = [ + 0.5, 0.75, 1.0, 1.25, 1.5, 1.75 + ] + cfg.data.test.pipeline[1].flip = True + cfg.model.pretrained = None + cfg.data.test.test_mode = True + + if args.gpu_id is not None: + cfg.gpu_ids = [args.gpu_id] + + # init distributed env first, since logger depends on the dist info. + if args.launcher == 'none': + cfg.gpu_ids = [args.gpu_id] + distributed = False + if len(cfg.gpu_ids) > 1: + warnings.warn(f'The gpu-ids is reset from {cfg.gpu_ids} to ' + f'{cfg.gpu_ids[0:1]} to avoid potential error in ' + 'non-distribute testing time.') + cfg.gpu_ids = cfg.gpu_ids[0:1] + else: + distributed = True + init_dist(args.launcher, **cfg.dist_params) + + rank, _ = get_dist_info() + # allows not to create + if args.work_dir is not None and rank == 0: + mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) + timestamp = "epoch_" + str(ckp) + if args.aug_test: + json_file = osp.join(args.work_dir, + f'eval_multi_scale_{timestamp}.json') + else: + json_file = osp.join(args.work_dir, + f'eval_single_scale_{timestamp}.json') + elif rank == 0: + work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + mmcv.mkdir_or_exist(osp.abspath(work_dir)) + timestamp = "epoch_" + str(ckp) + if args.aug_test: + json_file = osp.join(work_dir, + f'eval_multi_scale_{timestamp}.json') + else: + json_file = osp.join(work_dir, + f'eval_single_scale_{timestamp}.json') + + # build the dataloader + # TODO: support multiple images per gpu (only minor changes are needed) + dataset = build_dataset(cfg.data.test) + # The default loader config + loader_cfg = dict( + # cfg.gpus will be ignored if distributed + num_gpus=len(cfg.gpu_ids), + dist=distributed, + shuffle=False) + # The overall dataloader settings + loader_cfg.update({ + k: v + for k, v in cfg.data.items() if k not in [ + 'train', 'val', 'test', 'train_dataloader', 'val_dataloader', + 'test_dataloader' + ] + }) + test_loader_cfg = { + **loader_cfg, + 'samples_per_gpu': 1, + 'shuffle': False, # Not shuffle by default + **cfg.data.get('test_dataloader', {}) + } + # build the dataloader + data_loader = build_dataloader(dataset, **test_loader_cfg) + + # build the model and load checkpoint + cfg.model.train_cfg = None + model = build_segmentor(cfg.model, test_cfg=cfg.get('test_cfg')) + fp16_cfg = cfg.get('fp16', None) + if fp16_cfg is not None: + wrap_fp16_model(model) + checkpoint = load_checkpoint(model, checkpoint_path, map_location='cpu') + if 'CLASSES' in checkpoint.get('meta', {}): + model.CLASSES = checkpoint['meta']['CLASSES'] + else: + print('"CLASSES" not found in meta, use dataset.CLASSES instead') + model.CLASSES = dataset.CLASSES + if 'PALETTE' in checkpoint.get('meta', {}): + model.PALETTE = checkpoint['meta']['PALETTE'] + else: + print('"PALETTE" not found in meta, use dataset.PALETTE instead') + model.PALETTE = dataset.PALETTE + + # clean gpu memory when starting a new evaluation. + torch.cuda.empty_cache() + eval_kwargs = {} if args.eval_options is None else args.eval_options + + # Deprecated + efficient_test = eval_kwargs.get('efficient_test', False) + if efficient_test: + warnings.warn( + '``efficient_test=True`` does not have effect in tools/test.py, ' + 'the evaluation and format results are CPU memory efficient by ' + 'default') + + eval_on_format_results = ( + args.eval is not None and 'cityscapes' in args.eval) + if eval_on_format_results: + assert len(args.eval) == 1, 'eval on format results is not ' \ + 'applicable for metrics other than ' \ + 'cityscapes' + if args.format_only or eval_on_format_results: + if 'imgfile_prefix' in eval_kwargs: + tmpdir = eval_kwargs['imgfile_prefix'] + else: + tmpdir = '.format_cityscapes' + eval_kwargs.setdefault('imgfile_prefix', tmpdir) + mmcv.mkdir_or_exist(tmpdir) + else: + tmpdir = None + + cfg.device = get_device() + if not distributed: + warnings.warn( + 'SyncBN is only supported with DDP. To be compatible with DP, ' + 'we convert SyncBN to BN. Please use dist_train.sh which can ' + 'avoid this error.') + if not torch.cuda.is_available(): + assert digit_version(mmcv.__version__) >= digit_version('1.4.4'), \ + 'Please use MMCV >= 1.4.4 for CPU training!' + model = revert_sync_batchnorm(model) + model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids) + results = single_gpu_test( + model, + data_loader, + args.show, + args.show_dir, + False, + args.opacity, + pre_eval=args.eval is not None and not eval_on_format_results, + format_only=args.format_only or eval_on_format_results, + format_args=eval_kwargs) + else: + model = build_ddp( + model, + cfg.device, + device_ids=[int(os.environ['LOCAL_RANK'])], + broadcast_buffers=False) + results = multi_gpu_test( + model, + data_loader, + args.tmpdir, + args.gpu_collect, + False, + pre_eval=args.eval is not None and not eval_on_format_results, + format_only=args.format_only or eval_on_format_results, + format_args=eval_kwargs) + + rank, _ = get_dist_info() + if rank == 0: + if args.out: + warnings.warn( + 'The behavior of ``args.out`` has been changed since MMSeg ' + 'v0.16, the pickled outputs could be seg map as type of ' + 'np.array, pre-eval results or file paths for ' + '``dataset.format_results()``.') + print(f'\nwriting results to {args.out}') + mmcv.dump(results, args.out) + if args.eval: + eval_kwargs.update(metric=args.eval) + metric = dataset.evaluate(results, **eval_kwargs) + metric_dict = dict(config=args.config, metric=metric) + mmcv.dump(metric_dict, json_file, indent=4) + if tmpdir is not None and eval_on_format_results: + # remove tmp dir when cityscapes evaluation + shutil.rmtree(tmpdir) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/torchserve/mmseg2torchserve.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/torchserve/mmseg2torchserve.py new file mode 100644 index 0000000..9063634 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/torchserve/mmseg2torchserve.py @@ -0,0 +1,111 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser, Namespace +from pathlib import Path +from tempfile import TemporaryDirectory + +import mmcv + +try: + from model_archiver.model_packaging import package_model + from model_archiver.model_packaging_utils import ModelExportUtils +except ImportError: + package_model = None + + +def mmseg2torchserve( + config_file: str, + checkpoint_file: str, + output_folder: str, + model_name: str, + model_version: str = '1.0', + force: bool = False, +): + """Converts mmsegmentation model (config + checkpoint) to TorchServe + `.mar`. + + Args: + config_file: + In MMSegmentation config format. + The contents vary for each task repository. + checkpoint_file: + In MMSegmentation checkpoint format. + The contents vary for each task repository. + output_folder: + Folder where `{model_name}.mar` will be created. + The file created will be in TorchServe archive format. + model_name: + If not None, used for naming the `{model_name}.mar` file + that will be created under `output_folder`. + If None, `{Path(checkpoint_file).stem}` will be used. + model_version: + Model's version. + force: + If True, if there is an existing `{model_name}.mar` + file under `output_folder` it will be overwritten. + """ + mmcv.mkdir_or_exist(output_folder) + + config = mmcv.Config.fromfile(config_file) + + with TemporaryDirectory() as tmpdir: + config.dump(f'{tmpdir}/config.py') + + args = Namespace( + **{ + 'model_file': f'{tmpdir}/config.py', + 'serialized_file': checkpoint_file, + 'handler': f'{Path(__file__).parent}/mmseg_handler.py', + 'model_name': model_name or Path(checkpoint_file).stem, + 'version': model_version, + 'export_path': output_folder, + 'force': force, + 'requirements_file': None, + 'extra_files': None, + 'runtime': 'python', + 'archive_format': 'default' + }) + manifest = ModelExportUtils.generate_manifest_json(args) + package_model(args, manifest) + + +def parse_args(): + parser = ArgumentParser( + description='Convert mmseg models to TorchServe `.mar` format.') + parser.add_argument('config', type=str, help='config file path') + parser.add_argument('checkpoint', type=str, help='checkpoint file path') + parser.add_argument( + '--output-folder', + type=str, + required=True, + help='Folder where `{model_name}.mar` will be created.') + parser.add_argument( + '--model-name', + type=str, + default=None, + help='If not None, used for naming the `{model_name}.mar`' + 'file that will be created under `output_folder`.' + 'If None, `{Path(checkpoint_file).stem}` will be used.') + parser.add_argument( + '--model-version', + type=str, + default='1.0', + help='Number used for versioning.') + parser.add_argument( + '-f', + '--force', + action='store_true', + help='overwrite the existing `{model_name}.mar`') + args = parser.parse_args() + + return args + + +if __name__ == '__main__': + args = parse_args() + + if package_model is None: + raise ImportError('`torch-model-archiver` is required.' + 'Try: pip install torch-model-archiver') + + mmseg2torchserve(args.config, args.checkpoint, args.output_folder, + args.model_name, args.model_version, args.force) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/torchserve/mmseg_handler.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/torchserve/mmseg_handler.py new file mode 100644 index 0000000..28fe501 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/torchserve/mmseg_handler.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import base64 +import os + +import cv2 +import mmcv +import torch +from mmcv.cnn.utils.sync_bn import revert_sync_batchnorm +from ts.torch_handler.base_handler import BaseHandler + +from mmseg.apis import inference_segmentor, init_segmentor + + +class MMsegHandler(BaseHandler): + + def initialize(self, context): + properties = context.system_properties + self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu' + self.device = torch.device(self.map_location + ':' + + str(properties.get('gpu_id')) if torch.cuda. + is_available() else self.map_location) + self.manifest = context.manifest + + model_dir = properties.get('model_dir') + serialized_file = self.manifest['model']['serializedFile'] + checkpoint = os.path.join(model_dir, serialized_file) + self.config_file = os.path.join(model_dir, 'config.py') + + self.model = init_segmentor(self.config_file, checkpoint, self.device) + self.model = revert_sync_batchnorm(self.model) + self.initialized = True + + def preprocess(self, data): + images = [] + + for row in data: + image = row.get('data') or row.get('body') + if isinstance(image, str): + image = base64.b64decode(image) + image = mmcv.imfrombytes(image) + images.append(image) + + return images + + def inference(self, data, *args, **kwargs): + results = [inference_segmentor(self.model, img) for img in data] + return results + + def postprocess(self, data): + output = [] + + for image_result in data: + _, buffer = cv2.imencode('.png', image_result[0].astype('uint8')) + content = buffer.tobytes() + output.append(content) + return output diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/torchserve/test_torchserve.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/torchserve/test_torchserve.py new file mode 100644 index 0000000..432834a --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/torchserve/test_torchserve.py @@ -0,0 +1,58 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from argparse import ArgumentParser +from io import BytesIO + +import matplotlib.pyplot as plt +import mmcv +import requests + +from mmseg.apis import inference_segmentor, init_segmentor + + +def parse_args(): + parser = ArgumentParser( + description='Compare result of torchserve and pytorch,' + 'and visualize them.') + parser.add_argument('img', help='Image file') + parser.add_argument('config', help='Config file') + parser.add_argument('checkpoint', help='Checkpoint file') + parser.add_argument('model_name', help='The model name in the server') + parser.add_argument( + '--inference-addr', + default='127.0.0.1:8080', + help='Address and port of the inference server') + parser.add_argument( + '--result-image', + type=str, + default=None, + help='save server output in result-image') + parser.add_argument( + '--device', default='cuda:0', help='Device used for inference') + + args = parser.parse_args() + return args + + +def main(args): + url = 'http://' + args.inference_addr + '/predictions/' + args.model_name + with open(args.img, 'rb') as image: + tmp_res = requests.post(url, image) + content = tmp_res.content + if args.result_image: + with open(args.result_image, 'wb') as out_image: + out_image.write(content) + plt.imshow(mmcv.imread(args.result_image, 'grayscale')) + plt.show() + else: + plt.imshow(plt.imread(BytesIO(content))) + plt.show() + model = init_segmentor(args.config, args.checkpoint, args.device) + image = mmcv.imread(args.img) + result = inference_segmentor(model, image) + plt.imshow(result[0]) + plt.show() + + +if __name__ == '__main__': + args = parse_args() + main(args) diff --git a/prediction/image/mx15hdi/Detect/mmsegmentation/tools/train.py b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/train.py new file mode 100644 index 0000000..c4219b0 --- /dev/null +++ b/prediction/image/mx15hdi/Detect/mmsegmentation/tools/train.py @@ -0,0 +1,242 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import copy +import os +import os.path as osp +import time +import warnings + +import mmcv +import torch +import torch.distributed as dist +from mmcv.cnn.utils import revert_sync_batchnorm +from mmcv.runner import get_dist_info, init_dist +from mmcv.utils import Config, DictAction, get_git_hash + +from mmseg import __version__ +from mmseg.apis import init_random_seed, set_random_seed, train_segmentor +from mmseg.datasets import build_dataset +from mmseg.models import build_segmentor +from mmseg.utils import (collect_env, get_device, get_root_logger, + setup_multi_processes) + + +def parse_args(): + parser = argparse.ArgumentParser(description='Train a segmentor') + parser.add_argument('config', help='train config file path') + parser.add_argument('--work-dir', help='the dir to save logs and models') + parser.add_argument( + '--load-from', help='the checkpoint file to load weights from') + parser.add_argument( + '--resume-from', help='the checkpoint file to resume from') + parser.add_argument( + '--no-validate', + action='store_true', + help='whether not to evaluate the checkpoint during training') + group_gpus = parser.add_mutually_exclusive_group() + group_gpus.add_argument( + '--gpus', + type=int, + help='(Deprecated, please use --gpu-id) number of gpus to use ' + '(only applicable to non-distributed training)') + group_gpus.add_argument( + '--gpu-ids', + type=int, + nargs='+', + help='(Deprecated, please use --gpu-id) ids of gpus to use ' + '(only applicable to non-distributed training)') + group_gpus.add_argument( + '--gpu-id', + type=int, + default=0, + help='id of gpu to use ' + '(only applicable to non-distributed training)') + parser.add_argument('--seed', type=int, default=None, help='random seed') + parser.add_argument( + '--diff_seed', + action='store_true', + help='Whether or not set different seeds for different ranks') + parser.add_argument( + '--deterministic', + action='store_true', + help='whether to set deterministic options for CUDNN backend.') + parser.add_argument( + '--options', + nargs='+', + action=DictAction, + help="--options is deprecated in favor of --cfg_options' and it will " + 'not be supported in version v0.22.0. Override some settings in the ' + 'used config, the key-value pair in xxx=yyy format will be merged ' + 'into config file. If the value to be overwritten is a list, it ' + 'should be like key="[a,b]" or key=a,b It also allows nested ' + 'list/tuple values, e.g. key="[(a,b),(c,d)]" Note that the quotation ' + 'marks are necessary and that no white space is allowed.') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument('--local_rank', type=int, default=0) + parser.add_argument( + '--auto-resume', + action='store_true', + help='resume from the latest checkpoint automatically.') + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + if args.options and args.cfg_options: + raise ValueError( + '--options and --cfg-options cannot be both ' + 'specified, --options is deprecated in favor of --cfg-options. ' + '--options will not be supported in version v0.22.0.') + if args.options: + warnings.warn('--options is deprecated in favor of --cfg-options. ' + '--options will not be supported in version v0.22.0.') + args.cfg_options = args.options + + return args + + +def main(): + args = parse_args() + + cfg = Config.fromfile(args.config) + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # set cudnn_benchmark + if cfg.get('cudnn_benchmark', False): + torch.backends.cudnn.benchmark = True + + # work_dir is determined in this priority: CLI > segment in file > filename + if args.work_dir is not None: + # update configs according to CLI args if args.work_dir is not None + cfg.work_dir = args.work_dir + elif cfg.get('work_dir', None) is None: + # use config filename as default work_dir if cfg.work_dir is None + cfg.work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + if args.load_from is not None: + cfg.load_from = args.load_from + if args.resume_from is not None: + cfg.resume_from = args.resume_from + if args.gpus is not None: + cfg.gpu_ids = range(1) + warnings.warn('`--gpus` is deprecated because we only support ' + 'single GPU mode in non-distributed training. ' + 'Use `gpus=1` now.') + if args.gpu_ids is not None: + cfg.gpu_ids = args.gpu_ids[0:1] + warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. ' + 'Because we only support single GPU mode in ' + 'non-distributed training. Use the first GPU ' + 'in `gpu_ids` now.') + if args.gpus is None and args.gpu_ids is None: + cfg.gpu_ids = [args.gpu_id] + + cfg.auto_resume = args.auto_resume + + # init distributed env first, since logger depends on the dist info. + if args.launcher == 'none': + distributed = False + else: + distributed = True + init_dist(args.launcher, **cfg.dist_params) + # gpu_ids is used to calculate iter when resuming checkpoint + _, world_size = get_dist_info() + cfg.gpu_ids = range(world_size) + + # create work_dir + mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) + # dump config + cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config))) + # init the logger before other steps + timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) + log_file = osp.join(cfg.work_dir, f'{timestamp}.log') + logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) + + # set multi-process settings + setup_multi_processes(cfg) + + # init the meta dict to record some important information such as + # environment info and seed, which will be logged + meta = dict() + # log env info + env_info_dict = collect_env() + env_info = '\n'.join([f'{k}: {v}' for k, v in env_info_dict.items()]) + dash_line = '-' * 60 + '\n' + logger.info('Environment info:\n' + dash_line + env_info + '\n' + + dash_line) + meta['env_info'] = env_info + + # log some basic info + logger.info(f'Distributed training: {distributed}') + logger.info(f'Config:\n{cfg.pretty_text}') + + # set random seeds + cfg.device = get_device() + seed = init_random_seed(args.seed, device=cfg.device) + seed = seed + dist.get_rank() if args.diff_seed else seed + logger.info(f'Set random seed to {seed}, ' + f'deterministic: {args.deterministic}') + set_random_seed(seed, deterministic=args.deterministic) + cfg.seed = seed + meta['seed'] = seed + meta['exp_name'] = osp.basename(args.config) + + model = build_segmentor( + cfg.model, + train_cfg=cfg.get('train_cfg'), + test_cfg=cfg.get('test_cfg')) + model.init_weights() + + # SyncBN is not support for DP + if not distributed: + warnings.warn( + 'SyncBN is only supported with DDP. To be compatible with DP, ' + 'we convert SyncBN to BN. Please use dist_train.sh which can ' + 'avoid this error.') + model = revert_sync_batchnorm(model) + + logger.info(model) + + datasets = [build_dataset(cfg.data.train)] + if len(cfg.workflow) == 2: + val_dataset = copy.deepcopy(cfg.data.val) + val_dataset.pipeline = cfg.data.train.pipeline + datasets.append(build_dataset(val_dataset)) + if cfg.checkpoint_config is not None: + # save mmseg version, config file content and class names in + # checkpoints as meta data + cfg.checkpoint_config.meta = dict( + mmseg_version=f'{__version__}+{get_git_hash()[:7]}', + config=cfg.pretty_text, + CLASSES=datasets[0].CLASSES, + PALETTE=datasets[0].PALETTE) + # add an attribute for visualization convenience + model.CLASSES = datasets[0].CLASSES + # passing checkpoint meta for saving best checkpoint + meta.update(cfg.checkpoint_config.meta) + train_segmentor( + model, + datasets, + cfg, + distributed=distributed, + validate=(not args.no_validate), + timestamp=timestamp, + meta=meta) + + +if __name__ == '__main__': + main() diff --git a/prediction/image/mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_mx15hdi.py b/prediction/image/mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_mx15hdi.py new file mode 100644 index 0000000..392ffe5 --- /dev/null +++ b/prediction/image/mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_mx15hdi.py @@ -0,0 +1,208 @@ +import numpy as np +import pandas as pd +from pyproj import CRS, Transformer +from osgeo import gdal, osr +import os +import cv2 +from skimage import io +from scipy.interpolate import griddata +from rasterio.transform import from_origin +import rasterio +import matplotlib.pyplot as plt +from tqdm import tqdm +import sys + +np.set_printoptions(precision=30) + +def dms2degrees(dms): + dms = dms.astype(float) + return dms[:, 0] + dms[:, 1] / 60 + dms[:, 2] / 3600 + +def rot3d(cam_va, cam_ha): + rot_x = np.array([[1, 0, 0], + [0, np.cos(cam_va), -np.sin(cam_va)], + [0, np.sin(cam_va), np.cos(cam_va)]]) + + rot_z = np.array([[np.cos(cam_ha), -np.sin(cam_ha), 0], + [np.sin(cam_ha), np.cos(cam_ha), 0], + [0, 0, 1]]) + return rot_x @ rot_z + +# Get parameter from command line +if len(sys.argv) < 2: + raise ValueError("파라미터가 제공되지 않았습니다. 폴더 이름을 명령줄 인자로 입력해주세요.") +param = sys.argv[1] + +# Define paths with parameter +csv_path = os.path.join('../Metadata/CSV', param, 'mx15hdi_interpolation.csv') +result_path = os.path.join('../Detect/result', param) +mask_result_path = os.path.join('../Detect/Mask_result', param) +gsd_path = os.path.join('GSD', param) +tif_path = os.path.join('../Georeference/Tif', param) +mask_tif_path = os.path.join('../Georeference/Mask_Tif', param) + +# Check if input paths exist +if not os.path.exists(csv_path): + raise FileNotFoundError(f"CSV 파일이 존재하지 않습니다: {csv_path}") +if not os.path.exists(result_path): + raise FileNotFoundError(f"결과 이미지 폴더가 존재하지 않습니다: {result_path}") +if not os.path.exists(mask_result_path): + raise FileNotFoundError(f"마스크 이미지 폴더가 존재하지 않습니다: {mask_result_path}") + +# Load CSVs +data = pd.read_csv(csv_path) +data2 = pd.read_csv('../Georeference/Scripts/modifycsv2gcppoint.csv') + +n_data = data.shape[0] +t_pos = data[['Tlat_d', 'Tlat_m', 'Tlat_s', 'Tlon_d', 'Tlon_m', 'Tlon_s']].values +a_pos = data[['Alat_d', 'Alat_m', 'Alat_s', 'Alon_d', 'Alon_m', 'Alon_s']].values +cam_alt = data['Alt'].values + +cam_ang = data[['Az', 'El']].values + +t_pos_int = np.zeros((n_data, 2)) +a_pos_int = np.zeros((n_data, 2)) +t_pos_int[:, 0] = dms2degrees(t_pos[:, :3]) +t_pos_int[:, 1] = dms2degrees(t_pos[:, 3:]) +a_pos_int[:, 0] = dms2degrees(a_pos[:, :3]) +a_pos_int[:, 1] = dms2degrees(a_pos[:, 3:]) + +crs = CRS.from_epsg(5187) +transformer = Transformer.from_crs("EPSG:4326", crs, always_xy=True) + +t_pos_tm = np.zeros((n_data, 3)) +a_pos_tm = np.zeros((n_data, 3)) +t_pos_tm[:, :2] = np.column_stack(transformer.transform(t_pos_int[:, 1], t_pos_int[:, 0])) +a_pos_tm[:, :2] = np.column_stack(transformer.transform(a_pos_int[:, 1], a_pos_int[:, 0])) +t_pos_tm[:, 2] = 0 +a_pos_tm[:, 2] = 0 +a_pos_tm[:, 2] = cam_alt * 0.3048 + +# Main loop +for idx in tqdm(range(n_data)): + filename = data.Filename[idx].lstrip('/') + file_path = os.path.join(result_path, filename) + if not os.path.exists(file_path): + continue + + im = io.imread(file_path) + # im = cv2.imread(file_paths) + im = cv2.flip(im, 0) + h, w = im.shape[:2] + + # 내부 파라미터 + io_data = np.zeros((4,1)) + io_data[:2,0] = np.array([1+w, 1+h]) / 2 + io_data[2:4,0] = np.array([7709.45875793727, 3835.18509357639]) + cx, cy = io_data[0, 0], io_data[1, 0] + fx, fy = io_data[2, 0], io_data[3, 0] + + # 회전 행렬 + dst_h = np.linalg.norm(t_pos_tm[idx, :2] - a_pos_tm[idx, :2]) + dst_v = a_pos_tm[idx, 2] + cam_va = -np.arctan(dst_v / dst_h) + cam_ha = np.arctan2(t_pos_tm[idx, 1] - a_pos_tm[idx, 1], t_pos_tm[idx, 0] - a_pos_tm[idx, 0]) - np.pi / 2 + r_cg = rot3d(cam_va, cam_ha) + + # 픽셀 grid + u, v = np.meshgrid(np.arange(w), np.arange(h)) + x = (u - cx) + y = (cy - v) * fx / fy + z = -fx * np.ones_like(x) + rays = np.stack([x, y, z], axis=-1) + rays_world = rays @ r_cg.T + + camera_pos = a_pos_tm[idx] + scale = -camera_pos[2] / rays_world[..., 2] + ground_xy = camera_pos[:2] + rays_world[..., :2] * scale[..., np.newaxis] + + # GSD 계산 + # dx = np.gradient(ground_xy[..., 0], axis=1) + # dy = np.gradient(ground_xy[..., 1], axis=0) + # gsd_map = np.sqrt(dx**2 + dy**2) + + # 출력 폴더 준비 + # os.makedirs(gsd_path, exist_ok=True) + # base_name = os.path.splitext(os.path.basename(filename))[0] + + # GSD 저장 + #gsd_txt_path = os.path.join('GSD', base_name + "_gsd.txt") + #np.savetxt(gsd_txt_path, gsd_map, fmt="%.6f") + + # 평균 GSD 저장 + #avg_gsd = np.mean(gsd_map) + #avg_gsd_path = os.path.join('GSD', base_name + "_Average_gsd.txt") + #with open(avg_gsd_path, 'w') as f: + # f.write(f"{avg_gsd:.6f}\n") + + # GSD 히트맵 저장 + #heatmap_path = os.path.join('GSD', base_name + "_gsd_heatmap.png") + #plt.imsave(heatmap_path, gsd_map, cmap='hot') + + #print(f"Average GSD for {filename}: {avg_gsd:.4f} meters/pixel") + + # 보간용 정규 grid 정의 + min_x, min_y = np.min(ground_xy[..., 0]), np.min(ground_xy[..., 1]) + max_x, max_y = np.max(ground_xy[..., 0]), np.max(ground_xy[..., 1]) + pixel_size = 0.2 + out_w = int(np.ceil((max_x - min_x) / pixel_size)) + out_h = int(np.ceil((max_y - min_y) / pixel_size)) + + grid_x, grid_y = np.meshgrid( + np.linspace(min_x, max_x, out_w), + np.linspace(max_y, min_y, out_h) + ) + + points = ground_xy.reshape(-1, 2) + values = im.reshape(-1, im.shape[2]) + grid_points = np.stack([grid_x.ravel(), grid_y.ravel()], axis=-1) + interp_rgb = griddata(points, values, grid_points, method='nearest') + interp_rgb = interp_rgb.reshape((out_h, out_w, im.shape[2])) + # 추가: 색 보존을 위해 0~255 범위로 클리핑 후 uint8 변환 + interp_rgb = np.clip(interp_rgb, 0, 255).astype(np.uint8) + + name_without_extension = data.Filename[idx].replace(".png", "") + + # 원본 이미지 저장 + os.makedirs(tif_path, exist_ok=True) + output_path = os.path.join(tif_path, f'{name_without_extension}_gsd.tif') + transform = from_origin(min_x, max_y, pixel_size, pixel_size) + with rasterio.open( + output_path, + 'w', + driver='GTiff', + height=out_h, + width=out_w, + count=3, + dtype=interp_rgb.dtype, + crs=rasterio.crs.CRS.from_epsg(5187), + transform=transform + ) as dst: + for i in range(3): + dst.write(interp_rgb[:, :, i], i + 1) + + # 마스크 이미지 처리 + file_path2 = os.path.join(mask_result_path, filename) + if os.path.exists(file_path2): + mask_im = io.imread(file_path2) + mask_im = cv2.flip(mask_im, 0) + values_mask = mask_im.reshape(-1, mask_im.shape[2]) + interp_mask = griddata(points, values_mask, grid_points, method='nearest') + interp_mask = interp_mask.reshape((out_h, out_w, mask_im.shape[2])) + interp_mask = np.clip(interp_mask, 0, 255).astype(np.uint8) + + os.makedirs(mask_tif_path, exist_ok=True) + output_path2 = os.path.join(mask_tif_path, f'{name_without_extension}_gsd.tif') + with rasterio.open( + output_path2, + 'w', + driver='GTiff', + height=out_h, + width=out_w, + count=3, + dtype=interp_mask.dtype, + crs=rasterio.crs.CRS.from_epsg(5187), + transform=transform + ) as dst2: + for i in range(3): + dst2.write(interp_mask[:, :, i], i + 1) diff --git a/prediction/image/mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_nadir.py b/prediction/image/mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_nadir.py new file mode 100644 index 0000000..1eb2602 --- /dev/null +++ b/prediction/image/mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_nadir.py @@ -0,0 +1,225 @@ +import numpy as np +import pandas as pd +from pyproj import CRS, Transformer +from osgeo import gdal, osr +import os +import cv2 +from pathlib import Path +from skimage import io +from rasterio.transform import from_origin +import rasterio +import matplotlib.pyplot as plt +from tqdm import tqdm +import sys +# scipy.ndimage.binary_dilation은 cv2.dilate로 교체하여 제거 + +np.set_printoptions(precision=30) + +_SCRIPTS_DIR = Path(__file__).parent # mx15hdi/Georeference/Scripts/ +_MX15HDI_DIR = _SCRIPTS_DIR.parent.parent # mx15hdi/ + + +def dms2degrees(dms): + dms = dms.astype(float) + return dms[:, 0] + dms[:, 1] / 60 + dms[:, 2] / 3600 + + +def run_georeference(file_id: str, inference_cache: dict = None) -> dict: + """ + file_id 기준으로 세그멘테이션 결과 이미지를 GeoTIFF로 변환한다. + + Args: + file_id: 처리할 세션 식별자. + inference_cache: run_inference()의 반환값. 있으면 디스크 읽기 생략. + {image_filename: {'blended': ndarray, 'mask': ndarray, 'ext': str}} + + Returns: + georef_cache: {image_filename: {'mask': ndarray, 'transform': ..., 'crs': ...}} + 이 값을 run_oilshape()에 전달하면 Mask_Tif 디스크 읽기를 생략할 수 있다. + + 결과: mx15hdi/Georeference/Tif/{file_id}/ 에 blended GeoTIFF 저장. + Mask_Tif는 georef_cache로 반환하여 디스크 저장 생략. + """ + csv_path = str(_MX15HDI_DIR / 'Metadata' / 'CSV' / file_id / 'mx15hdi_interpolation.csv') + result_path = str(_MX15HDI_DIR / 'Detect' / 'result' / file_id) + mask_result_path = str(_MX15HDI_DIR / 'Detect' / 'Mask_result' / file_id) + tif_path = str(_MX15HDI_DIR / 'Georeference' / 'Tif' / file_id) + + # CSV는 metadata export 단계에서 생성됨 (필수) + if not os.path.exists(csv_path): + raise FileNotFoundError(f"CSV 파일이 존재하지 않습니다: {csv_path}") + + # inference_cache 없이 디스크 폴백 사용 시 경로 확인 + if inference_cache is None: + if not os.path.exists(result_path): + raise FileNotFoundError(f"결과 이미지 폴더가 존재하지 않습니다: {result_path}") + if not os.path.exists(mask_result_path): + raise FileNotFoundError(f"마스크 이미지 폴더가 존재하지 않습니다: {mask_result_path}") + + # Load CSV + data = pd.read_csv(csv_path) + n_data = data.shape[0] + + # Extract aircraft position + a_pos = data[['Alat_d', 'Alat_m', 'Alat_s', 'Alon_d', 'Alon_m', 'Alon_s']].values + cam_alt = data['Alt'].values + + # Convert DMS to decimal degrees + a_pos_int = np.zeros((n_data, 2)) + a_pos_int[:, 0] = dms2degrees(a_pos[:, :3]) # latitude + a_pos_int[:, 1] = dms2degrees(a_pos[:, 3:]) # longitude + + # Transform to EPSG:5187 + crs = CRS.from_epsg(5187) + transformer = Transformer.from_crs("EPSG:4326", crs, always_xy=True) + + a_pos_tm = np.zeros((n_data, 3)) + a_pos_tm[:, :2] = np.column_stack(transformer.transform(a_pos_int[:, 1], a_pos_int[:, 0])) + a_pos_tm[:, 2] = cam_alt * 0.3048 # feet to meters + + # Camera intrinsic parameters (Hasselblad L2D-20c) + FOCAL_LENGTH_MM = 12.29 + SENSOR_WIDTH_MM = 17.3 + SENSOR_HEIGHT_MM = 13.0 + + # dilate 커널 루프 외부에서 1회 생성 + dilate_kernel = np.ones((3, 3), np.uint8) + + georef_cache = {} + + for idx in tqdm(range(n_data)): + filename = data.Filename[idx].lstrip('/') + + # --- 이미지 로딩: inference_cache 우선, 없으면 디스크 폴백 --- + if inference_cache is not None and filename in inference_cache: + im = inference_cache[filename]['blended'] + mask_im = inference_cache[filename]['mask'] + else: + file_path = os.path.join(result_path, filename) + if not os.path.exists(file_path): + base_name, ext = os.path.splitext(filename) + found = False + for try_ext in [ext.lower(), ext.upper(), ext.capitalize()]: + try_path = os.path.join(result_path, base_name + try_ext) + if os.path.exists(try_path): + file_path = try_path + filename = base_name + try_ext + found = True + break + if not found: + raise FileNotFoundError(f"파일이 존재하지 않습니다: {file_path}") + im = io.imread(file_path) + file_path2 = os.path.join(mask_result_path, filename) + mask_im = io.imread(file_path2) if os.path.exists(file_path2) else None + + h, w = im.shape[:2] + + # Focal length in pixels + fx = (FOCAL_LENGTH_MM / SENSOR_WIDTH_MM) * w + fy = (FOCAL_LENGTH_MM / SENSOR_HEIGHT_MM) * h + cx = w / 2.0 + cy = h / 2.0 + + camera_x, camera_y, camera_z = a_pos_tm[idx] + + # Pixel coordinate grid → ground coordinates + u, v = np.meshgrid(np.arange(w), np.arange(h)) + x_normalized = (u - cx) / fx + y_normalized = (cy - v) / fy + scale = camera_z + + ground_x = camera_x + x_normalized * scale + ground_y = camera_y + y_normalized * scale + + min_x, min_y = np.min(ground_x), np.min(ground_y) + max_x, max_y = np.max(ground_x), np.max(ground_y) + + pixel_size = camera_z * SENSOR_WIDTH_MM / (FOCAL_LENGTH_MM * w) / 1000 + pixel_size = max(0.01, pixel_size) + + out_w = int(np.ceil((max_x - min_x) / pixel_size)) + out_h = int(np.ceil((max_y - min_y) / pixel_size)) + + MAX_DIMENSION = 50000 + if out_w > MAX_DIMENSION or out_h > MAX_DIMENSION: + print(f"Warning: Output size too large ({out_w}x{out_h}). Adjusting pixel size.") + pixel_size = max((max_x - min_x) / MAX_DIMENSION, (max_y - min_y) / MAX_DIMENSION) + out_w = int(np.ceil((max_x - min_x) / pixel_size)) + out_h = int(np.ceil((max_y - min_y) / pixel_size)) + print(f"Adjusted output size: {out_w}x{out_h}, pixel_size: {pixel_size:.4f}m") + + output_rgb = np.zeros((out_h, out_w, 3), dtype=np.uint8) + + out_col = ((ground_x - min_x) / pixel_size).astype(np.int32) + out_row = ((max_y - ground_y) / pixel_size).astype(np.int32) + valid_mask = (out_col >= 0) & (out_col < out_w) & (out_row >= 0) & (out_row < out_h) + + output_rgb[out_row[valid_mask], out_col[valid_mask]] = im[valid_mask] + + # --- 홀 채우기: scipy.binary_dilation 제거, cv2.dilate로 통일 --- + # fill_mask를 rgb/mask 양쪽에서 재사용하여 중복 계산 제거 + fill_mask = None + + if np.sum(output_rgb == 0) > 0: + fill_mask = np.any(output_rgb > 0, axis=2).astype(np.uint8) + for _ in range(2): + fill_mask = cv2.dilate(fill_mask, dilate_kernel) + bool_mask = fill_mask > 0 + for c in range(3): + channel = output_rgb[:, :, c] + dilated = cv2.dilate(channel, dilate_kernel) + channel[bool_mask & (channel == 0)] = dilated[bool_mask & (channel == 0)] + + # Save blended GeoTIF (API /get-image/ 엔드포인트에 필요 → 디스크 저장 유지) + name_without_extension = data.Filename[idx].replace(".png", "").replace(".jpg", "").replace(".tif", "") + os.makedirs(tif_path, exist_ok=True) + output_path = os.path.join(tif_path, f'{name_without_extension}_gsd.tif') + transform = from_origin(min_x, max_y, pixel_size, pixel_size) + + with rasterio.open( + output_path, 'w', + driver='GTiff', + height=out_h, width=out_w, count=3, + dtype=output_rgb.dtype, + crs=rasterio.crs.CRS.from_epsg(5187), + transform=transform, + compress='lzw' + ) as dst: + for i in range(3): + dst.write(output_rgb[:, :, i], i + 1) + + print(f"Saved: {output_path} (Size: {out_w}x{out_h}, GSD: {pixel_size*100:.2f}cm)") + + # --- mask 처리: 디스크 저장 생략, georef_cache로 반환 --- + output_mask = None + if mask_im is not None: + output_mask = np.zeros((out_h, out_w, 3), dtype=np.uint8) + output_mask[out_row[valid_mask], out_col[valid_mask]] = mask_im[valid_mask] + + # rgb와 동일한 fill_mask 재사용 (중복 계산 없음) + if fill_mask is not None and np.sum(output_mask == 0) > 0: + bool_mask = fill_mask > 0 + for _ in range(2): + fill_mask = cv2.dilate(fill_mask, dilate_kernel) + bool_mask = fill_mask > 0 + for c in range(3): + channel = output_mask[:, :, c] + dilated = cv2.dilate(channel, dilate_kernel) + channel[bool_mask & (channel == 0)] = dilated[bool_mask & (channel == 0)] + + # georef_cache에 mask 배열 + transform 정보 저장 + # (oilshape이 Mask_Tif 디스크 읽기 없이 바로 처리) + georef_cache[filename] = { + 'mask': output_mask, # (H, W, 3) uint8 or None + 'transform': transform, + 'crs': rasterio.crs.CRS.from_epsg(5187), + } + + print("Processing complete!") + return georef_cache + + +if __name__ == '__main__': + if len(sys.argv) < 2: + raise ValueError("파라미터가 제공되지 않았습니다. 폴더 이름을 명령줄 인자로 입력해주세요.") + run_georeference(sys.argv[1]) diff --git a/prediction/image/mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_nadir_2.py b/prediction/image/mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_nadir_2.py new file mode 100644 index 0000000..ead659f --- /dev/null +++ b/prediction/image/mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_nadir_2.py @@ -0,0 +1,306 @@ +import numpy as np +import pandas as pd +from pyproj import CRS, Transformer +from osgeo import gdal, osr +import os +import cv2 +from skimage import io +from rasterio.transform import from_origin +import rasterio +import matplotlib.pyplot as plt +from tqdm import tqdm +import sys +from datetime import datetime + +np.set_printoptions(precision=30) + +def dms2degrees(dms): + dms = dms.astype(float) + return dms[:, 0] + dms[:, 1] / 60 + dms[:, 2] / 3600 + +# Get parameter from command line +if len(sys.argv) < 2: + raise ValueError("파라미터가 제공되지 않았습니다. 폴더 이름을 명령줄 인자로 입력해주세요.") +param = sys.argv[1] + +print(f"\n========== 지오레퍼런싱 시작: {param} ==========\n") + +# Define paths with parameter +csv_path = os.path.join('../Metadata/CSV', param, 'mx15hdi_interpolation.csv') +result_path = os.path.join('../Detect/result', param) +mask_result_path = os.path.join('../Detect/Mask_result', param) +gsd_path = os.path.join('GSD', param) +tif_path = os.path.join('../Georeference/Tif', param) +mask_tif_path = os.path.join('../Georeference/Mask_Tif', param) + +print(f"CSV 경로: {csv_path}") +print(f"결과 이미지 경로: {result_path}") +print(f"마스크 이미지 경로: {mask_result_path}\n") + +# Check if input paths exist +if not os.path.exists(csv_path): + print(f"[ERROR] CSV 파일이 존재하지 않습니다: {csv_path}") + raise FileNotFoundError(f"CSV 파일이 존재하지 않습니다: {csv_path}") +if not os.path.exists(result_path): + print(f"[ERROR] 결과 이미지 폴더가 존재하지 않습니다: {result_path}") + raise FileNotFoundError(f"결과 이미지 폴더가 존재하지 않습니다: {result_path}") +if not os.path.exists(mask_result_path): + print(f"[ERROR] 마스크 이미지 폴더가 존재하지 않습니다: {mask_result_path}") + raise FileNotFoundError(f"마스크 이미지 폴더가 존재하지 않습니다: {mask_result_path}") + +# Load CSV +print("CSV 파일 로딩 중...") +data = pd.read_csv(csv_path) +n_data = data.shape[0] +print(f"총 {n_data}개의 이미지 데이터 로드 완료\n") + +# Extract aircraft position (which is same as target for nadir imaging) +a_pos = data[['Alat_d', 'Alat_m', 'Alat_s', 'Alon_d', 'Alon_m', 'Alon_s']].values +cam_alt = data['Alt'].values + +print(f"고도 범위: {cam_alt.min():.2f} ~ {cam_alt.max():.2f} feet") + +# Convert DMS to decimal degrees +a_pos_int = np.zeros((n_data, 2)) +a_pos_int[:, 0] = dms2degrees(a_pos[:, :3]) # latitude +a_pos_int[:, 1] = dms2degrees(a_pos[:, 3:]) # longitude + +print(f"위도 범위: {a_pos_int[:, 0].min():.6f} ~ {a_pos_int[:, 0].max():.6f}") +print(f"경도 범위: {a_pos_int[:, 1].min():.6f} ~ {a_pos_int[:, 1].max():.6f}") + +# Transform to projected coordinate system (EPSG:5187) +print("\n좌표 변환 중 (EPSG:4326 -> EPSG:5187)...") +crs = CRS.from_epsg(5187) +transformer = Transformer.from_crs("EPSG:4326", crs, always_xy=True) + +a_pos_tm = np.zeros((n_data, 3)) +a_pos_tm[:, :2] = np.column_stack(transformer.transform(a_pos_int[:, 1], a_pos_int[:, 0])) +a_pos_tm[:, 2] = cam_alt * 0.3048 # feet to meters + +print(f"변환된 X 범위: {a_pos_tm[:, 0].min():.2f} ~ {a_pos_tm[:, 0].max():.2f} m") +print(f"변환된 Y 범위: {a_pos_tm[:, 1].min():.2f} ~ {a_pos_tm[:, 1].max():.2f} m") +print(f"고도 범위 (m): {a_pos_tm[:, 2].min():.2f} ~ {a_pos_tm[:, 2].max():.2f} m") + +# Camera intrinsic parameters (Hasselblad L2D-20c) +FOCAL_LENGTH_MM = 12.29 +SENSOR_WIDTH_MM = 17.3 # 4/3" sensor +SENSOR_HEIGHT_MM = 13.0 + +print(f"\n카메라 파라미터 - 초점거리: {FOCAL_LENGTH_MM}mm, 센서: {SENSOR_WIDTH_MM}x{SENSOR_HEIGHT_MM}mm") + +# 처리 통계 +success_count = 0 +skip_count = 0 +error_count = 0 + +# Main loop +print("\n이미지 처리 시작...\n") +for idx in tqdm(range(n_data)): + filename = data.Filename[idx].lstrip('/') + file_path = os.path.join(result_path, filename) + + print(f"\n[{idx+1}/{n_data}] 처리 중: {filename}") + + if not os.path.exists(file_path): + print(f" ⚠ 파일이 존재하지 않음: {file_path}") + skip_count += 1 + continue + + try: + # Read image + print(f" 이미지 읽기 시작...") + im = io.imread(file_path) + h, w = im.shape[:2] + print(f" 이미지 크기: {w}x{h}, 채널: {im.shape[2] if len(im.shape) > 2 else 1}") + + # Calculate focal length in pixels + fx = (FOCAL_LENGTH_MM / SENSOR_WIDTH_MM) * w + fy = (FOCAL_LENGTH_MM / SENSOR_HEIGHT_MM) * h + + # Principal point (image center) + cx = w / 2.0 + cy = h / 2.0 + + print(f" 초점거리 (픽셀): fx={fx:.2f}, fy={fy:.2f}") + print(f" 주점: cx={cx:.2f}, cy={cy:.2f}") + + # Aircraft position + camera_x, camera_y, camera_z = a_pos_tm[idx] + print(f" 카메라 위치: X={camera_x:.2f}m, Y={camera_y:.2f}m, Z={camera_z:.2f}m") + + # Create pixel coordinate grid + print(f" 픽셀 좌표 그리드 생성 중...") + u, v = np.meshgrid(np.arange(w), np.arange(h)) + + # Convert pixel coordinates to normalized camera coordinates + x_normalized = (u - cx) / fx + y_normalized = (cy - v) / fy + + # Calculate ground coordinates + scale = camera_z + print(f" 스케일 팩터 (고도): {scale:.2f}m") + + ground_x = camera_x + x_normalized * scale + ground_y = camera_y + y_normalized * scale + + # Calculate output bounds + min_x, min_y = np.min(ground_x), np.min(ground_y) + max_x, max_y = np.max(ground_x), np.max(ground_y) + + print(f" 지상 좌표 범위: X=[{min_x:.2f}, {max_x:.2f}], Y=[{min_y:.2f}, {max_y:.2f}]") + print(f" 커버 영역: {max_x - min_x:.2f}m x {max_y - min_y:.2f}m") + + # Define output pixel size (GSD) + pixel_size = camera_z * SENSOR_WIDTH_MM / (FOCAL_LENGTH_MM * w) / 1000 + pixel_size = max(0.01, pixel_size) + print(f" GSD (Ground Sample Distance): {pixel_size*100:.2f}cm/pixel") + + # Calculate output dimensions + out_w = int(np.ceil((max_x - min_x) / pixel_size)) + out_h = int(np.ceil((max_y - min_y) / pixel_size)) + + print(f" 출력 이미지 크기: {out_w}x{out_h} pixels") + + # Sanity check for output size + MAX_DIMENSION = 50000 + if out_w > MAX_DIMENSION or out_h > MAX_DIMENSION: + print(f" ⚠ 출력 크기가 너무 큼 ({out_w}x{out_h}). 픽셀 크기 조정...") + pixel_size = max((max_x - min_x) / MAX_DIMENSION, (max_y - min_y) / MAX_DIMENSION) + out_w = int(np.ceil((max_x - min_x) / pixel_size)) + out_h = int(np.ceil((max_y - min_y) / pixel_size)) + print(f" 조정된 출력 크기: {out_w}x{out_h}, GSD: {pixel_size*100:.2f}cm") + + # Initialize output arrays + print(f" 출력 배열 초기화 중...") + output_rgb = np.zeros((out_h, out_w, 3), dtype=np.uint8) + + # Map each input pixel to output grid + print(f" 픽셀 매핑 계산 중...") + out_col = ((ground_x - min_x) / pixel_size).astype(np.int32) + out_row = ((max_y - ground_y) / pixel_size).astype(np.int32) + + # Clip to valid range + valid_mask = (out_col >= 0) & (out_col < out_w) & (out_row >= 0) & (out_row < out_h) + valid_pixels = np.sum(valid_mask) + total_pixels = valid_mask.size + print(f" 유효 픽셀: {valid_pixels}/{total_pixels} ({valid_pixels/total_pixels*100:.1f}%)") + + # Fill output image + print(f" 출력 이미지 채우기 중...") + output_rgb[out_row[valid_mask], out_col[valid_mask]] = im[valid_mask] + + # Fill small holes + zero_pixels_before = np.sum(output_rgb == 0) + if zero_pixels_before > 0: + print(f" 빈 픽셀 채우기 중... (빈 픽셀: {zero_pixels_before})") + from scipy.ndimage import binary_dilation + mask = np.any(output_rgb > 0, axis=2) + for iteration in range(2): + mask = binary_dilation(mask) + for c in range(3): + channel = output_rgb[:, :, c] + dilated = cv2.dilate(channel, np.ones((3,3), np.uint8)) + channel[mask & (channel == 0)] = dilated[mask & (channel == 0)] + + zero_pixels_after = np.sum(output_rgb == 0) + print(f" 채우기 완료: {zero_pixels_before} -> {zero_pixels_after} 빈 픽셀") + + # Save georeferenced image + name_without_extension = data.Filename[idx].replace(".png", "").replace(".jpg", "").replace(".tif", "") + + os.makedirs(tif_path, exist_ok=True) + output_path = os.path.join(tif_path, f'{name_without_extension}_gsd.tif') + transform = from_origin(min_x, max_y, pixel_size, pixel_size) + + print(f" GeoTIFF 저장 중: {output_path}") + with rasterio.open( + output_path, + 'w', + driver='GTiff', + height=out_h, + width=out_w, + count=3, + dtype=output_rgb.dtype, + crs=rasterio.crs.CRS.from_epsg(5187), + transform=transform, + compress='lzw' + ) as dst: + for i in range(3): + dst.write(output_rgb[:, :, i], i + 1) + + file_size_mb = os.path.getsize(output_path) / (1024 * 1024) + print(f" RGB 저장 완료: {output_path}") + print(f" 파일 크기: {file_size_mb:.2f}MB") + + # Process mask image + file_path2 = os.path.join(mask_result_path, filename) + if os.path.exists(file_path2): + print(f" 마스크 이미지 처리 중...") + mask_im = io.imread(file_path2) + print(f" 마스크 크기: {mask_im.shape}") + + # Initialize output mask + output_mask = np.zeros((out_h, out_w, 3), dtype=np.uint8) + + # Map mask pixels to output grid + output_mask[out_row[valid_mask], out_col[valid_mask]] = mask_im[valid_mask] + + # Fill small holes + if np.sum(output_mask == 0) > 0: + print(f" 마스크 빈 픽셀 채우기 중...") + mask = np.any(output_mask > 0, axis=2) + for _ in range(2): + mask = binary_dilation(mask) + for c in range(3): + channel = output_mask[:, :, c] + dilated = cv2.dilate(channel, np.ones((3,3), np.uint8)) + channel[mask & (channel == 0)] = dilated[mask & (channel == 0)] + + os.makedirs(mask_tif_path, exist_ok=True) + output_path2 = os.path.join(mask_tif_path, f'{name_without_extension}_gsd.tif') + + print(f" 마스크 GeoTIFF 저장 중: {output_path2}") + with rasterio.open( + output_path2, + 'w', + driver='GTiff', + height=out_h, + width=out_w, + count=3, + dtype=output_mask.dtype, + crs=rasterio.crs.CRS.from_epsg(5187), + transform=transform, + compress='lzw' + ) as dst2: + for i in range(3): + dst2.write(output_mask[:, :, i], i + 1) + + mask_size_mb = os.path.getsize(output_path2) / (1024 * 1024) + print(f" 마스크 저장 완료: {output_path2}") + print(f" 파일 크기: {mask_size_mb:.2f}MB") + else: + print(f" 마스크 파일 없음: {file_path2}") + + success_count += 1 + print(f" 처리 완료!") + + except Exception as e: + error_count += 1 + print(f" 오류 발생: {str(e)}") + print(f" 파일: {filename}") + print(f" 인덱스: {idx}") + import traceback + traceback.print_exc() + continue + +# 최종 통계 +print("\n" + "="*60) +print("처리 완료!") +print(f"총 이미지: {n_data}개") +print(f"성공: {success_count}개") +print(f"건너뜀: {skip_count}개") +print(f"오류: {error_count}개") +print("="*60) + +print("\nProcessing complete!") + diff --git a/prediction/image/mx15hdi/Georeference/Scripts/modifycsv2gcppoint.csv b/prediction/image/mx15hdi/Georeference/Scripts/modifycsv2gcppoint.csv new file mode 100644 index 0000000..b7b7c79 --- /dev/null +++ b/prediction/image/mx15hdi/Georeference/Scripts/modifycsv2gcppoint.csv @@ -0,0 +1,16 @@ +"Filename x1 y1 x2 y2",x1,y1,x2,y2 +"11126001 01.jpg ",1033,90,1724,263 +"11126001 02.jpg ",1033,91,1726,265 +"11126001 03.jpg ",1043,101,1729,276 +"11126001 04.jpg ",1076,67,1783,236 +"11126001 06.jpg ",1112,3,1847,171 +"11126001 07.jpg ",1100,5,1847,199 +"11126001 08.jpg ",1079,92,1757,283 +"11126001 09.jpg ",1041,259,1482,396 +"11126001 10.jpg ",1037,290,1407,411 +11126001 11.jpg,1049,244,1425,373 +"11126001 12.jpg ",1075,308,1370,421 +"11126001 13.jpg ",1094,344,1344,447 +11126001 14.jpg,1174,283,1441,404 +"11126001 15.jpg ",1254,242,1534,374 +"11126001 16.jpg ",1276,218,1538,359 diff --git a/prediction/image/mx15hdi/Main/Combine_module.py b/prediction/image/mx15hdi/Main/Combine_module.py new file mode 100644 index 0000000..64ad199 --- /dev/null +++ b/prediction/image/mx15hdi/Main/Combine_module.py @@ -0,0 +1,38 @@ +import sys +from pathlib import Path + +_MAIN_DIR = Path(__file__).parent # mx15hdi/Main/ +_MX15HDI_DIR = _MAIN_DIR.parent # mx15hdi/ + +# 파이프라인 모듈 임포트 +sys.path.insert(0, str(_MX15HDI_DIR / 'Detect')) +sys.path.insert(0, str(_MX15HDI_DIR / 'Metadata' / 'Scripts')) +sys.path.insert(0, str(_MX15HDI_DIR / 'Georeference' / 'Scripts')) +sys.path.insert(0, str(_MX15HDI_DIR / 'Polygon' / 'Scripts')) + +from Inference import load_model, run_inference +from Export_Metadata_mx15hdi import run_metadata_export +from Create_Georeferenced_Images_nadir import run_georeference +from Oilshape import run_oilshape + + +def run_pipeline(file_id: str, model=None): + """ + 4단계 파이프라인을 순서대로 실행한다. + model이 None이면 내부에서 1회 로드한다 (단독 실행 시). + api.py에서는 사전 로드된 model을 전달하여 재로딩을 방지한다. + """ + if model is None: + model = load_model() + + run_inference(model, file_id) + run_metadata_export(file_id) + run_georeference(file_id) + run_oilshape(file_id) + + +if __name__ == "__main__": + params = sys.argv[1:] if len(sys.argv) > 1 else [] + if not params: + raise ValueError("파라미터가 제공되지 않았습니다. 폴더 이름을 명령줄 인자로 입력해주세요.") + run_pipeline(params[0]) diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_d_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_d_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_d_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_d_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_d_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_d_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_m_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_m_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_m_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_m_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_m_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_m_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_s_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_s_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_s_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_s_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_s_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alat_s_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_d_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_d_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_d_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_d_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_d_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_d_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_m_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_m_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_m_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_m_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_m_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_m_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_s_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_s_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_s_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_s_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_s_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alon_s_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alt_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alt_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alt_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alt_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alt_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Alt_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Az_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Az_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Az_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Az_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Az_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Az_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_El_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_El_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_El_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_El_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_El_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_El_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_d_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_d_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_d_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_d_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_d_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_d_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_m_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_m_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_m_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_m_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_m_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_m_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_s_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_s_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_s_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_s_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_s_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlat_s_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_d_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_d_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_d_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_d_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_d_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_d_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_m_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_m_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_m_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_m_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_m_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_m_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_s_empty.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_s_empty.png new file mode 100644 index 0000000..22d482b Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_s_empty.png differ diff --git a/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_s_roi_patch.png b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_s_roi_patch.png new file mode 100644 index 0000000..56044f3 Binary files /dev/null and b/prediction/image/mx15hdi/Main/debug_empty_roi/shipimg.jpg_Tlon_s_roi_patch.png differ diff --git a/prediction/image/mx15hdi/Metadata/Scripts/Export_Metadata_mx15hdi.py b/prediction/image/mx15hdi/Metadata/Scripts/Export_Metadata_mx15hdi.py new file mode 100644 index 0000000..1b6c1cf --- /dev/null +++ b/prediction/image/mx15hdi/Metadata/Scripts/Export_Metadata_mx15hdi.py @@ -0,0 +1,395 @@ +# -*- coding: utf-8 -*- +import os +os.environ['KMP_DUPLICATE_LIB_OK']='True' +from PIL import Image +from PIL.ExifTags import TAGS, GPSTAGS +from datetime import datetime +import cv2 +import csv +from tqdm import tqdm +from PIL import Image +import numpy as np +import pandas as pd +import re +import logging +import sys +from pathlib import Path + +logging.getLogger("ppocr").setLevel(logging.WARNING) + +# PaddleOCR는 geo_info() 호출 시 1회만 초기화 (레이지 로딩) +_ocr_engine = None + +_SCRIPTS_DIR = Path(__file__).parent # mx15hdi/Metadata/Scripts/ +_MX15HDI_DIR = _SCRIPTS_DIR.parent.parent # mx15hdi/ + + +def _get_ocr_engine(): + """PaddleOCR 엔진을 최초 호출 시 초기화하여 반환한다.""" + global _ocr_engine + if _ocr_engine is None: + from paddleocr import PaddleOCR + _ocr_engine = PaddleOCR(use_angle_cls=False, lang='en', det=False, rec=True) + return _ocr_engine + + +def convert_gps_to_degrees(gps_coords): + """ + Pillow GPS 좌표를 도(degrees) 단위로 변환 + + Args: + gps_coords: GPS 좌표 튜플 (degrees, minutes, seconds) + + Returns: + float: 십진수 각도 + """ + try: + d = float(gps_coords[0]) + m = float(gps_coords[1]) + s = float(gps_coords[2]) + return d + (m / 60.0) + (s / 3600.0) + except (ZeroDivisionError, IndexError, AttributeError, TypeError): + return 0.0 + + +def decimal_to_dms(decimal_degrees): + """ + 십진수 좌표를 도분초(DMS) 형식으로 변환 + """ + if decimal_degrees is None: + return None, None, None, False + + is_negative = decimal_degrees < 0 + decimal_degrees = abs(decimal_degrees) + + degrees = int(decimal_degrees) + minutes_decimal = (decimal_degrees - degrees) * 60 + minutes = int(minutes_decimal) + seconds = (minutes_decimal - minutes) * 60 + + return degrees, minutes, seconds, is_negative + +class meta_info: + def extract_and_save_image_metadata(self, image_path, output_csv_path): + """ + 단일 이미지 파일에서 EXIF 정보를 추출하고 결과를 CSV 파일로 저장합니다. + + Args: + image_path: 정보를 추출할 단일 이미지 파일 경로. + output_csv_path: CSV 파일 저장 경로. + + Returns: + Dict[str, Any]: 추출된 메타데이터 정보 딕셔너리 또는 파일이 없으면 None. + """ + + # 1. 파일 존재 여부 확인 + if not os.path.exists(image_path): + print(f"파일을 찾을 수 없습니다: {image_path}") + return None + + image_path_list = os.listdir(image_path) + image_nm = image_path_list[0] + image_dir = os.path.join(image_path, image_nm) + + # 2. 이미지 정보 추출 (Pillow 사용) + info = { + 'datetime': None, + 'latitude': None, + 'longitude': None, + 'altitude': None, + 'lat_dms': (None, None, None, False), + 'lon_dms': (None, None, None, False), + 'date_parts': (None, None, None), + 'time_parts': (None, None, None) + } + + try: + # Pillow로 이미지 열기 + image = Image.open(image_dir) + + # EXIF 데이터 추출 + exifdata = image.getexif() + + if not exifdata: + print("EXIF 정보를 찾을 수 없습니다.") + # info 그대로 반환하여 빈 값이라도 CSV에 기록 + + # EXIF 태그를 딕셔너리로 변환 + exif_dict = {} + for tag_id, value in exifdata.items(): + tag = TAGS.get(tag_id, tag_id) + exif_dict[tag] = value + + # 1) 촬영시간 추출 + datetime_tags = ['DateTimeOriginal', 'DateTimeDigitized', 'DateTime'] + for tag in datetime_tags: + if tag in exif_dict: + datetime_str = str(exif_dict[tag]) + info['datetime'] = datetime_str + try: + dt_obj = datetime.strptime(datetime_str, "%Y:%m:%d %H:%M:%S") + info['date_parts'] = (dt_obj.day, dt_obj.month, dt_obj.year) + info['time_parts'] = (dt_obj.hour, dt_obj.minute, dt_obj.second) + except ValueError: + pass + break + + # 2) GPS 정보 추출 + gps_ifd = exifdata.get_ifd(0x8825) # GPS IFD 태그 + + if gps_ifd: + # GPS 데이터를 딕셔너리로 변환 + gps_dict = {} + for tag_id, value in gps_ifd.items(): + tag = GPSTAGS.get(tag_id, tag_id) + gps_dict[tag] = value + + # 위도 추출 + if 'GPSLatitude' in gps_dict and 'GPSLatitudeRef' in gps_dict: + lat = convert_gps_to_degrees(gps_dict['GPSLatitude']) + lat_ref = str(gps_dict['GPSLatitudeRef']).strip() + + if lat_ref == 'S': + lat = -lat + + info['latitude'] = lat + info['lat_dms'] = decimal_to_dms(lat) + + # 경도 추출 + if 'GPSLongitude' in gps_dict and 'GPSLongitudeRef' in gps_dict: + lon = convert_gps_to_degrees(gps_dict['GPSLongitude']) + lon_ref = str(gps_dict['GPSLongitudeRef']).strip() + + if lon_ref == 'W': + lon = -lon + + info['longitude'] = lon + info['lon_dms'] = decimal_to_dms(lon) + + # 고도 추출 + if 'GPSAltitude' in gps_dict: + try: + altitude = float(gps_dict['GPSAltitude']) + + # 해수면 아래인지 확인 + if 'GPSAltitudeRef' in gps_dict: + altitude_ref = gps_dict['GPSAltitudeRef'] + if altitude_ref == 1: # 1 = 해수면 아래 + altitude = -altitude + + info['altitude'] = altitude + except (ValueError, TypeError): + info['altitude'] = None + + image.close() + + except Exception as e: + print(f"'{os.path.basename(image_path)}' 처리 중 오류 발생: {e}") + # 오류 발생 시에도 현재까지 추출된 info 반환 + pass + + # 3. 추출된 정보를 CSV 파일로 저장 + try: + output_dir = os.path.dirname(output_csv_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir) + + with open(output_csv_path, 'w', newline='', encoding='utf-8-sig') as csvfile: + # CSV 헤더 정의 + fieldnames = [ + 'Filename', + 'Tlat_d', + 'Tlat_m', + 'Tlat_s', + 'Tlon_d', + 'Tlon_m', + 'Tlon_s', + 'Alat_d', + 'Alat_m', + 'Alat_s', + 'Alon_d', + 'Alon_m', + 'Alon_s', + 'Az', + 'El', + 'Alt', + 'Date1', + 'Date2', + 'Date3', + 'Time1', + 'Time2', + 'Time3' + ] + + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + writer.writeheader() + + # 정보 정리 + lat_d, lat_m, lat_s, _ = info['lat_dms'] + lon_d, lon_m, lon_s, _ = info['lon_dms'] + day, month, year = info['date_parts'] + hour, minute, second = info['time_parts'] + + row = { + 'Filename': image_nm, + 'Tlat_d': lat_d if lat_d is not None else '', + 'Tlat_m': lat_m if lat_m is not None else '', + 'Tlat_s': f"{lat_s:.4f}" if lat_s is not None else '', + 'Tlon_d': lon_d if lon_d is not None else '', + 'Tlon_m': lon_m if lon_m is not None else '', + 'Tlon_s': f"{lon_s:.4f}" if lon_s is not None else '', + 'Alat_d': lat_d if lat_d is not None else '', + 'Alat_m': lat_m if lat_m is not None else '', + 'Alat_s': f"{lat_s:.4f}" if lat_s is not None else '', + 'Alon_d': lon_d if lon_d is not None else '', + 'Alon_m': lon_m if lon_m is not None else '', + 'Alon_s': f"{lon_s:.4f}" if lon_s is not None else '', + 'Az': '', + 'El': '', + 'Alt': f"{info['altitude']:.2f}" if info['altitude'] is not None else '', + 'Date1': day if day is not None else '', + 'Date2': month if month is not None else '', + 'Date3': year if year is not None else '', + 'Time1': hour if hour is not None else '', + 'Time2': minute if minute is not None else '', + 'Time3': second if second is not None else '' + } + + writer.writerow(row) + + except Exception as e: + print(f"CSV 저장 중 오류 발생: {e}") + return info # 추출된 정보 반환 + + def geo_info(self, frame_folder_dir, output, positions_csv): + print("frame_folder_dir: ", frame_folder_dir) + meta_list = ["Filename", "Tlat_d", "Tlat_m", "Tlat_s", + "Tlon_d", "Tlon_m", "Tlon_s", + "Alat_d", "Alat_m", "Alat_s", + "Alon_d", "Alon_m", "Alon_s", + "Az", "El", "Alt", + "Date1", "Date2", "Date3", + "Time1", "Time2", "Time3"] + + # Check if frame_folder_dir exists + if not os.path.exists(frame_folder_dir): + raise FileNotFoundError(f"이미지 폴더가 존재하지 않습니다: {frame_folder_dir}") + + ocr_engine = _get_ocr_engine() # 레이지 초기화 + positions = read_positions_from_csv(positions_csv) + frame_meta_list = [] + frame_nm_list = os.listdir(frame_folder_dir) + debug_dir = "debug_empty_roi" + os.makedirs(debug_dir, exist_ok=True) + + for frame_nm in tqdm(frame_nm_list): + frame_dir = os.path.join(frame_folder_dir, frame_nm) + frame = cv2.imread(frame_dir) + if frame is None: + print(f"이미지 로드 실패: {frame_dir}") + continue + + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(gray, 140, 255, cv2.THRESH_BINARY) + dst2 = cv2.bitwise_not(thresh) + + frame_dict = {"Filename": frame_nm} + + for key in meta_list[1:]: + y1, y2, x1, x2 = positions.get(key, (0, 0, 0, 0)) + if key == "El": + x1 = max(0, x1 - 10) + x2 = min(dst2.shape[1], x2 + 20) + + roi = dst2[y1:y2, x1:x2] + + if roi is None or roi.size == 0 or roi.shape[0] == 0 or roi.shape[1] == 0: + print(f"빈 ROI 발생 - key: {key}, frame: {frame_nm}, 좌표: y({y1}-{y2}), x({x1}-{x2})") + debug_path = os.path.join(debug_dir, f"{frame_nm}_{key}_empty.png") + cv2.imwrite(debug_path, dst2) # 전체 이미지 저장 + empty_patch = np.zeros((50, 150), dtype=np.uint8) + cv2.imwrite(debug_path.replace("empty.png", "roi_patch.png"), empty_patch) + result = "" + else: + if key == "El": + roi = cv2.resize(roi, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC) + roi = cv2.GaussianBlur(roi, (3, 3), 0) + roi = cv2.adaptiveThreshold(roi, 255, cv2.ADAPTIVE_THRESH_MEAN_C, + cv2.THRESH_BINARY_INV, 11, 4) + text_result = ocr_engine.ocr(roi, cls=False) + digits = [] + for line in text_result[0]: + text = line[1][0].replace(" ", "").strip() + if re.fullmatch(r"\d+", text): + x_center = (line[0][0][0] + line[0][2][0]) / 2 + digits.append((x_center, text)) + digits_sorted = sorted(digits, key=lambda x: x[0]) + result = "".join([d[1] for d in digits_sorted]) + elif key == "Alat_d": + roi = cv2.resize(roi, None, fx=2.5, fy=2.5, interpolation=cv2.INTER_CUBIC) + roi = cv2.GaussianBlur(roi, (3, 3), 0) + roi = cv2.adaptiveThreshold(roi, 255, cv2.ADAPTIVE_THRESH_MEAN_C, + cv2.THRESH_BINARY_INV, 11, 4) + text_result = ocr_engine.ocr(roi, cls=False) + roi_center_y = roi.shape[0] / 2 + closest_box = None + closest_dist = float('inf') + result = "" + for line in text_result[0]: + text = line[1][0].replace(" ", "").strip() + conf = line[1][1] + box = line[0] + cy = (box[0][1] + box[2][1]) / 2 + if re.match(r'^-?\d+(\.\d+)?$', text): + dist = abs(cy - roi_center_y) + if dist < closest_dist: + closest_dist = dist + result = text + else: + text = ocr_engine.ocr(roi, cls=False) + if text and text[0]: + result = text[0][0][1][0].replace(" ", "").strip() + else: + result = "" + + frame_dict[key] = result + + frame_meta_list.append(frame_dict) + + os.makedirs(os.path.dirname(output), exist_ok=True) + with open(output, "w", encoding='utf-8-sig', newline='') as f: + writer = csv.DictWriter(f, fieldnames=meta_list) + writer.writeheader() + for data in frame_meta_list: + writer.writerow(data) + print(f"geo_info 생성 완료: {output}") + + def interpolation(self, input, output): + df = pd.read_csv(input) + df.ffill(inplace=True) # fillna(method='ffill') deprecated → ffill() + df.bfill(inplace=True) # fillna(method='bfill') deprecated → bfill() + df.to_csv(output, index=False, encoding='utf-8-sig') + print(f"interpolation 저장 완료: {output}") + + +def run_metadata_export(file_id: str): + """ + file_id 기준으로 EXIF 추출 + 보간 CSV를 생성한다. + 결과: mx15hdi/Metadata/CSV/{file_id}/mx15hdi_interpolation.csv + """ + img_path = str(_MX15HDI_DIR / 'Metadata' / 'Image' / 'Original_Images' / file_id) + csv_path = str(_MX15HDI_DIR / 'Metadata' / 'CSV' / file_id / 'mx15hdi.csv') + interp_csv_path = str(_MX15HDI_DIR / 'Metadata' / 'CSV' / file_id / 'mx15hdi_interpolation.csv') + + i = meta_info() + i.extract_and_save_image_metadata(image_path=img_path, output_csv_path=csv_path) + i.interpolation(input=csv_path, output=interp_csv_path) + + +if __name__ == "__main__": + # Get parameter from command line + if len(sys.argv) < 2: + raise ValueError("파라미터가 제공되지 않았습니다. 폴더 이름을 명령줄 인자로 입력해주세요.") + param = sys.argv[1] + print("param: ", param) + run_metadata_export(param) diff --git a/prediction/image/mx15hdi/Metadata/Scripts/ocr_position_mx15hdi.csv b/prediction/image/mx15hdi/Metadata/Scripts/ocr_position_mx15hdi.csv new file mode 100644 index 0000000..6bfb5cc --- /dev/null +++ b/prediction/image/mx15hdi/Metadata/Scripts/ocr_position_mx15hdi.csv @@ -0,0 +1,22 @@ +Name,Start_Y,End_Y,Start_X,End_X +Az,967,1000,892,984 +El,480,610,67,120 +Tlat_d,988,1018,1673,1732 +Tlat_m,988,1018,1751,1805 +Tlat_s,988,1018,1822,1872 +Tlon_d,1018,1048,1640,1732 +Tlon_m,1018,1048,1751,1805 +Tlon_s,1016,1048,1822,1872 +Alat_d,988,1018,47,93 +Alat_m,988,1018,118,167 +Alat_s,988,1018,186,238 +Alon_d,1018,1048,23,93 +Alon_m,1018,1048,116,169 +Alon_s,1018,1048,186,238 +Alt,1017,1048,331,433 +Date1,28,57,23,68 +Date2,28,57,69,141 +Date3,28,57,142,238 +Time1,59,89,21,71 +Time2,59,89,94,141 +Time3,59,89,164,213 diff --git a/prediction/image/mx15hdi/Polygon/Scripts/Oilshape.py b/prediction/image/mx15hdi/Polygon/Scripts/Oilshape.py new file mode 100644 index 0000000..87d691f --- /dev/null +++ b/prediction/image/mx15hdi/Polygon/Scripts/Oilshape.py @@ -0,0 +1,249 @@ +import numpy as np +import cv2, os, rasterio +import geopandas as gpd +from shapely.geometry import Polygon +from typing import List, Tuple, Dict, Optional +from pathlib import Path +import sys + +''' Class ID: + 1: Black oil + 2: Brown oil + 3: Rainbow oil + 4: Silver oil +''' + +_SCRIPTS_DIR = Path(__file__).parent # mx15hdi/Polygon/Scripts/ +_MX15HDI_DIR = _SCRIPTS_DIR.parent.parent # mx15hdi/ + + +def get_class_mask(mask: np.ndarray, + class_colors: Dict[int, Tuple[int, int, int]], + threshold: int = 30) -> np.ndarray: + """ + Convert noisy RGB mask to a class ID mask using color distance threshold. + """ + h, w, _ = mask.shape + class_mask = np.zeros((h, w), dtype=np.uint8) + + for class_id, target_color in class_colors.items(): + diff = mask.astype(np.int16) - np.array(target_color, dtype=np.int16) + dist = np.linalg.norm(diff, axis=2) + class_mask[dist < threshold] = class_id + + return class_mask + + +def mask_to_polygons(mask: np.ndarray, + class_colors: Dict[int, Tuple[int, int, int]] = None, + min_area: int = 0, + # simplify: bool = False, + simplify: bool = True, + threshold: int = 30) -> Dict[int, List[Dict]]: + + if class_colors is None: + class_colors = { + 0: (0, 0, 0), # Background + 1: (0, 0, 204), # Black oil + 2: (180, 180, 180), # Brown oil + 3: (255, 255, 0), # Rainbow oil + 4: (178, 102, 255) # Silver oil + } + + polygons = {} + if len(mask.shape) == 3: + class_mask = get_class_mask(mask, class_colors, threshold) + else: + class_mask = mask.astype(np.uint8) + + for class_id in class_colors: + if class_id == 0: + continue + + binary_mask = (class_mask == class_id).astype(np.uint8) + contours, hierarchy = cv2.findContours(binary_mask, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE) + + if hierarchy is None or len(contours) == 0: + polygons[class_id] = [] + continue + + hierarchy = hierarchy[0] + class_polygons = [] + + def is_valid(contour): + return cv2.contourArea(contour) >= min_area and len(contour) >= 3 + + for idx, (cnt, h) in enumerate(zip(contours, hierarchy)): + parent = h[3] + if parent != -1: + continue + + if not is_valid(cnt): + continue + + if simplify: + epsilon = 0.0005 * cv2.arcLength(cnt, True) + exterior = cv2.approxPolyDP(cnt, epsilon, True).reshape(-1, 2) + else: + exterior = cnt.reshape(-1, 2) + + holes = [] + child_id = h[2] + while child_id != -1: + hole_cnt = contours[child_id] + if is_valid(hole_cnt): + if simplify: + eps_h = 0.005 * cv2.arcLength(hole_cnt, True) + hole = cv2.approxPolyDP(hole_cnt, eps_h, True).reshape(-1, 2) + else: + hole = hole_cnt.reshape(-1, 2) + holes.append(hole) + child_id = hierarchy[child_id][0] + + class_polygons.append({'exterior': exterior, 'holes': holes}) + + polygons[class_id] = class_polygons + + return polygons + + +def pixel_to_geo(coords: np.ndarray, transform) -> List[Tuple[float, float]]: + """픽셀 좌표 배열을 지리 좌표 목록으로 변환한다 (벡터화 처리).""" + xs, ys = coords[:, 0], coords[:, 1] + lons, lats = rasterio.transform.xy(transform, ys, xs, offset='center') + return list(zip(lons, lats)) + + +def save_polygons_to_shapefile(polygons: Dict[int, List[Dict]], + transform, + crs, + output_path: str): + class_thickness_mm = { + 1: 1.0, # Black oil (Emulsion) + 2: 0.1, # Brown oil (Crude) + 3: 0.0003, # Rainbow oil (Slick) + 4: 0.0001 # Silver oil (Slick) + } + + class_notes = { + 1: "Black - Emulsion", + 2: "Brown - Crude", + 3: "Rainbow/Silver - Slick", + 4: "Rainbow/Silver - Slick" + } + + records = [] + + for class_id, class_polys in polygons.items(): + for poly in class_polys: + exterior_coords = np.array(poly['exterior']) + exterior_geo = pixel_to_geo(exterior_coords, transform) + + if exterior_geo[0] != exterior_geo[-1]: + exterior_geo.append(exterior_geo[0]) + + holes_geo = [] + for hole in poly['holes']: + hole_coords = np.array(hole) + hole_geo = pixel_to_geo(hole_coords, transform) + if hole_geo[0] != hole_geo[-1]: + hole_geo.append(hole_geo[0]) + holes_geo.append(hole_geo) + + shape = Polygon(shell=exterior_geo, holes=holes_geo if holes_geo else None) + shape = shape.buffer(0) + + if not shape.is_valid or shape.is_empty: + continue + + area = shape.area + thickness_m = class_thickness_mm.get(class_id, 0) / 1000.0 + volume = area * thickness_m + note = class_notes.get(class_id, "Unknown") + + records.append({ + 'geometry': shape, + 'class_id': class_id, + 'area_m2': area, + 'volume_m3': volume, + 'note': note + }) + + if not records: + print("No valid polygons to save for:", output_path) + return + + gdf = gpd.GeoDataFrame(records, crs=crs) + gdf.to_file(output_path) + print(f"Saved shapefile: {output_path}") + + +def _process_mask_entry(filename: str, mask_data: np.ndarray, transform, crs, + output_shp_folder: Path): + """하나의 mask 배열(메모리 또는 디스크 읽기 후)을 폴리곤으로 변환하여 저장한다.""" + shp_output_path = output_shp_folder / (Path(filename).stem + ".shp") + + mask = mask_data + if mask.ndim == 3 and mask.shape[0] in (1, 3, 4): + # (C, H, W) → (H, W, C) + mask = np.transpose(mask, (1, 2, 0)) + if mask.shape[2] > 3: + mask = mask[:, :, :3] + elif mask.ndim == 3 and mask.shape[2] == 1: + mask = np.squeeze(mask) + + polygons = mask_to_polygons(mask, simplify=False, threshold=30) + save_polygons_to_shapefile(polygons, transform, crs, str(shp_output_path)) + + +def run_oilshape(file_id: str, georef_cache: Optional[dict] = None): + """ + file_id 기준 마스크에서 유류 폴리곤을 추출하여 Shapefile로 저장한다. + + Args: + file_id: 처리할 세션 식별자. + georef_cache: run_georeference()의 반환값. 있으면 Mask_Tif 디스크 읽기 생략. + {image_filename: {'mask': ndarray, 'transform': ..., 'crs': ...}} + + 결과: mx15hdi/Polygon/Shp/{file_id}/*.shp + """ + output_shp_folder = _MX15HDI_DIR / 'Polygon' / 'Shp' / file_id + os.makedirs(output_shp_folder, exist_ok=True) + + if georef_cache: + # In-memory 경로: Mask_Tif 디스크 읽기 없이 메모리 배열 사용 + for filename, entry in georef_cache.items(): + mask_data = entry.get('mask') + transform = entry.get('transform') + crs = entry.get('crs') + + if mask_data is None: + print(f"mask 없음, 건너뜀: {filename}") + continue + + _process_mask_entry(filename, mask_data, transform, crs, output_shp_folder) + else: + # 디스크 폴백: Mask_Tif 폴더에서 읽기 + mask_folder = _MX15HDI_DIR / 'Georeference' / 'Mask_Tif' / file_id + + if not mask_folder.exists(): + raise FileNotFoundError(f"마스크 폴더가 존재하지 않습니다: {mask_folder}") + + for filename in os.listdir(mask_folder): + if not filename.endswith(".tif"): + continue + + tif_mask_path = mask_folder / filename + + with rasterio.open(tif_mask_path) as src: + mask_data = src.read() + transform = src.transform + crs = src.crs + + _process_mask_entry(filename, mask_data, transform, crs, output_shp_folder) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + raise ValueError("파라미터가 제공되지 않았습니다. 폴더 이름을 명령줄 인자로 입력해주세요.") + run_oilshape(sys.argv[1]) diff --git a/prediction/image/optimization.md b/prediction/image/optimization.md new file mode 100644 index 0000000..b10b61f --- /dev/null +++ b/prediction/image/optimization.md @@ -0,0 +1,134 @@ +# run-script API 성능 최적화 기록 + +## 결과 요약 + +| 단계 | 소요 시간 | +|------|----------| +| 최적화 전 | 35.12초 | +| 1차 최적화 후 | ~12초 | +| 2차 최적화 후 | ~8~10초 (예상) | + +--- + +## 1차 최적화 (~35초 → ~12초) + +### 문제 원인 + +| 위치 | 원인 | +|------|------| +| `Inference.py` 모듈 레벨 | `init_segmentor()` 호출 → 요청마다 GPU 모델 재로딩 (15~20초) | +| `api.py` | `subprocess.run()` 으로 `Combine_module.py` 실행 → 매 요청마다 Python 인터프리터 재시작 | +| `Combine_module.py` | Step1~4를 순차 subprocess 4개로 실행 | +| `Georeference.py` 내부 루프 | `np.ones((3,3), np.uint8)` dilate 커널을 루프마다 재생성 | +| `Oilshape.py pixel_to_geo()` | `rasterio.transform.xy()` 를 좌표 1개씩 루프로 호출 | +| `Inference.py` 내부 루프 | `palette_array = np.array(model.PALETTE)` 를 이미지마다 재생성 | + +### 개선 내용 + +**`mx15hdi/Detect/Inference.py`** +- `load_model()` 함수 분리 — 모델 초기화를 서버 시작 시 1회로 분리 +- `run_inference(model, file_id)` 함수화 — 사전 로드된 모델을 인자로 수신 +- `palette_array` 루프 외부로 이동 (이미지마다 재생성 제거) +- `cv2.cvtColor` 제거 → numpy 슬라이싱 `color_mask[:, :, ::-1].copy()` 로 대체 + +**`mx15hdi/Metadata/Scripts/Export_Metadata_mx15hdi.py`** +- 모듈 레벨 `PaddleOCR()` 초기화 제거 → `_get_ocr_engine()` lazy 초기화로 변경 +- `run_metadata_export(file_id)` 함수화 + 절대 경로(`_MX15HDI_DIR`) 기반으로 전환 +- deprecated API 수정: `fillna(method='ffill')` → `ffill()` / `bfill()` + +**`mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_nadir.py`** +- `run_georeference(file_id)` 함수화 + 절대 경로 기반으로 전환 +- `dilate_kernel = np.ones((3,3), np.uint8)` 루프 외부로 이동 (루프마다 재생성 제거) + +**`mx15hdi/Polygon/Scripts/Oilshape.py`** +- `run_oilshape(file_id)` 함수화 + 절대 경로 기반으로 전환 +- `pixel_to_geo()` 벡터화: 좌표 배열 일괄 처리 (`rasterio.transform.xy` 배열 입력) + +**`mx15hdi/Main/Combine_module.py`** +- subprocess 4개 → 직접 함수 호출로 교체 +- `run_pipeline(file_id, model=None)` 함수 추가 + +**`api.py`** +- FastAPI `lifespan` 이벤트로 서버 시작 시 모델 1회 로딩 +- `ThreadPoolExecutor(max_workers=4)` 추가 +- `_run_mx15hdi_pipeline()` 비동기 함수: Step1(추론) + Step2(메타데이터)를 `asyncio.gather`로 병렬 실행 + +--- + +## 2차 최적화 (~12초 → ~8~10초) + +### 문제 원인 + +| 위치 | 원인 | +|------|------| +| `Inference.py` | `cv2.imread(image_path)` 로드 후 `inference_segmentor(model, image_path)` 에 경로 문자열 전달 → mmseg 내부에서 동일 이미지 재읽기 | +| `Inference.py` → `Georeference.py` | blended/mask를 `Detect/result/`, `Detect/Mask_result/` 에 저장 후 georeference에서 다시 읽음 (디스크 왕복 1) | +| `Georeference.py` → `Oilshape.py` | mask를 `Georeference/Mask_Tif/` 에 LZW 압축 GeoTIF로 저장 후 oilshape에서 다시 읽음 (디스크 왕복 2) | +| `Georeference.py` | `scipy.ndimage.binary_dilation` 사용 (Python 구현, OpenCV 대비 느림) | +| `Georeference.py` | rgb와 mask의 빈 픽셀 fill_mask를 중복 계산 | + +### 개선 내용 + +**`mx15hdi/Detect/Inference.py`** +- `inference_segmentor(model, img_bgr)` — 경로 대신 배열 직접 전달 (이중 읽기 제거) +- `write_files: bool = False` 파라미터 추가 — 중간 파일 저장 선택적 처리 +- `inference_cache` dict 반환: `{filename: {'blended': ndarray, 'mask': ndarray, 'ext': str}}` + +**`mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_nadir.py`** +- `inference_cache: dict = None` 파라미터 추가 — 있으면 메모리 배열 사용, 없으면 디스크 폴백 +- `scipy.ndimage.binary_dilation` 제거 → `cv2.dilate` 로 통일 +- rgb/mask 공통 `fill_mask` 재사용 — 중복 계산 제거 +- Mask_Tif GeoTIF 디스크 저장 생략 — `georef_cache` 로 반환 +- `georef_cache` dict 반환: `{filename: {'mask': ndarray, 'transform': ..., 'crs': ...}}` + +**`mx15hdi/Polygon/Scripts/Oilshape.py`** +- `georef_cache: Optional[dict] = None` 파라미터 추가 +- 있으면 메모리 배열 직접 처리, 없으면 `Mask_Tif/` 디스크 폴백 +- `_process_mask_entry()` 헬퍼 함수 분리 + +**`api.py`** +- `_run_mx15hdi_pipeline()` 캐시 체이닝: + ```python + inference_cache, _ = await asyncio.gather( + loop.run_in_executor(_executor, run_inference, _model, file_id), + loop.run_in_executor(_executor, run_metadata_export, file_id), + ) + georef_cache = await loop.run_in_executor( + _executor, run_georeference, file_id, inference_cache + ) + await loop.run_in_executor(_executor, run_oilshape, file_id, georef_cache) + ``` + +### 디스크 I/O 흐름 변화 + +**최적화 전:** +``` +Inference → Detect/result/ (write) + Detect/Mask_result/ (write) +Georeference ← Detect/result/ (read) ← 왕복 1 + Detect/Mask_result/ (read) +Georeference → Georeference/Mask_Tif/ (write) +Oilshape ← Georeference/Mask_Tif/ (read) ← 왕복 2 +``` + +**최적화 후:** +``` +Inference → inference_cache (메모리) +Georeference ← inference_cache (메모리) ← 왕복 1 제거 +Georeference → georef_cache (메모리) +Oilshape ← georef_cache (메모리) ← 왕복 2 제거 +Georeference → Georeference/Tif/ (write) ← /get-image/ API 전용, 유지 +``` + +--- + +## 수정 파일 목록 + +| 파일 | 1차 | 2차 | +|------|-----|-----| +| `mx15hdi/Detect/Inference.py` | ✅ | ✅ | +| `mx15hdi/Metadata/Scripts/Export_Metadata_mx15hdi.py` | ✅ | — | +| `mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_nadir.py` | ✅ | ✅ | +| `mx15hdi/Polygon/Scripts/Oilshape.py` | ✅ | ✅ | +| `mx15hdi/Main/Combine_module.py` | ✅ | — | +| `api.py` | ✅ | ✅ | diff --git a/prediction/image/pic_gps.py b/prediction/image/pic_gps.py new file mode 100644 index 0000000..fb8d36c --- /dev/null +++ b/prediction/image/pic_gps.py @@ -0,0 +1,520 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +pic_gps.py +드론/폰 사진 폴더를 합성(스티칭)해서 한 장으로 저장하는 스크립트. ++ GPS 정보 보존 및 중앙 이미지 찾기 기능 추가 + +✅ 새로운 기능 +- GPS 정보가 있는 이미지들의 중앙 좌표를 계산하여 결과 이미지에 저장 +- 가장 중앙에 위치한 원본 이미지를 찾아 출력 +- --gps-strategy 옵션으로 GPS 저장 방식 선택 (center/first) + +설치: + pip install opencv-contrib-python numpy pillow piexif + +예시: + python pic_gps.py --mode drone --input "./photo/drone" --out "./out/drone.jpg" --enhance + python pic_gps.py --gps-strategy center # GPS 중앙값 사용 + python pic_gps.py --gps-strategy first # 첫 이미지 GPS 사용 +""" + +import argparse +import os +import sys +import glob +from typing import List, Tuple, Optional, Dict +import math + +import cv2 +import numpy as np +from PIL import Image +import piexif + +IMG_EXTS = (".jpg", ".jpeg", ".png", ".tif", ".tiff", ".bmp", ".webp") + + +def list_images(folder: str, prefix: str = "") -> List[str]: + if not os.path.isdir(folder): + return [] + paths: List[str] = [] + + pattern_prefix = f"{prefix}*" if prefix else "*" + + for ext in IMG_EXTS: + paths.extend(glob.glob(os.path.join(folder, f"{pattern_prefix}{ext}"))) + paths.extend(glob.glob(os.path.join(folder, f"{pattern_prefix}{ext.upper()}"))) + return sorted(set(paths)) + + +def load_images(paths: List[str]) -> List[np.ndarray]: + images: List[np.ndarray] = [] + for p in paths: + img = cv2.imread(p, cv2.IMREAD_COLOR) + if img is None: + print(f"[WARN] 이미지 읽기 실패: {p}", file=sys.stderr) + continue + images.append(img) + return images + + +def resize_max_dim(img: np.ndarray, max_dim: int) -> np.ndarray: + if max_dim <= 0: + return img + h, w = img.shape[:2] + m = max(h, w) + if m <= max_dim: + return img + scale = max_dim / float(m) + new_w = int(round(w * scale)) + new_h = int(round(h * scale)) + return cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_AREA) + + +def clahe_contrast(img: np.ndarray) -> np.ndarray: + """대비 보정: 바다/유막처럼 특징점 약할 때 도움""" + lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB) + l, a, b = cv2.split(lab) + clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8)) + l2 = clahe.apply(l) + lab2 = cv2.merge([l2, a, b]) + return cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR) + + +def preprocess(images: List[np.ndarray], max_dim: int, enhance: bool) -> List[np.ndarray]: + out: List[np.ndarray] = [] + for img in images: + x = resize_max_dim(img, max_dim) + if enhance: + x = clahe_contrast(x) + out.append(x) + return out + + +def stitch_with_mode(images: List[np.ndarray], stitch_mode: str) -> Tuple[int, np.ndarray]: + """ + stitch_mode: "PANORAMA" | "SCANS" + returns (status, pano) + """ + if stitch_mode.upper() == "SCANS": + mode = cv2.Stitcher_SCANS + else: + mode = cv2.Stitcher_PANORAMA + + stitcher = cv2.Stitcher_create(mode) + + try: + stitcher.setPanoConfidenceThresh(0.5) + except Exception: + pass + + status, pano = stitcher.stitch(images) + return status, pano + + +def run_stitch(mode: str, images: List[np.ndarray], try_fallback: bool) -> np.ndarray: + """ + mode: "drone" | "phone" + - drone: SCANS 우선 → 실패 시 PANORAMA + - phone: PANORAMA 우선 → 실패 시 SCANS + """ + mode = mode.lower().strip() + if mode not in ("drone", "phone"): + raise ValueError("mode는 'drone' 또는 'phone' 이어야 합니다.") + + primary = "SCANS" if mode == "drone" else "PANORAMA" + secondary = "PANORAMA" if primary == "SCANS" else "SCANS" + + status, pano = stitch_with_mode(images, primary) + if status == cv2.Stitcher_OK: + print(f"[OK] Stitch success with {primary}") + return pano + + print(f"[WARN] Stitch failed with {primary}. status={status}", file=sys.stderr) + + if try_fallback: + status2, pano2 = stitch_with_mode(images, secondary) + if status2 == cv2.Stitcher_OK: + print(f"[OK] Stitch success with fallback {secondary}") + return pano2 + print(f"[ERR] Stitch failed with fallback {secondary}. status={status2}", file=sys.stderr) + + raise RuntimeError( + "스티칭에 실패했습니다.\n" + "가능 원인: 사진 겹침 부족 / 흔들림 / 시점차 과다 / 바다만 가득(특징점 부족)\n" + "해결 팁:\n" + "- 사진 간 30% 이상 겹침\n" + "- 해안선/부두/선박/부표 등 고정 기준물 포함된 사진을 섞기\n" + "- --max-dim 1800 정도로 낮추고 --enhance 켜서 재시도\n" + ) + + +# ========== GPS 관련 함수 ========== + +def dms_to_decimal(dms: tuple, ref: str) -> float: + """ + DMS (Degrees, Minutes, Seconds) 형식을 십진수로 변환 + dms: ((deg_num, deg_den), (min_num, min_den), (sec_num, sec_den)) + ref: 'N', 'S', 'E', 'W' + """ + degrees = dms[0][0] / dms[0][1] + minutes = dms[1][0] / dms[1][1] + seconds = dms[2][0] / dms[2][1] + + decimal = degrees + (minutes / 60.0) + (seconds / 3600.0) + + if ref in ['S', 'W']: + decimal = -decimal + + return decimal + + +def extract_datetime(image_path: str) -> Optional[str]: + """ + 이미지에서 촬영 날짜/시간 추출 + returns: 촬영 날짜 문자열 (YYYY:MM:DD HH:MM:SS) 또는 None + """ + try: + img = Image.open(image_path) + exif_data = img.info.get('exif') + if not exif_data: + return None + + exif_dict = piexif.load(exif_data) + exif_info = exif_dict.get('Exif', {}) + + # DateTimeOriginal (원본 촬영 시간) 우선 + if piexif.ExifIFD.DateTimeOriginal in exif_info: + datetime_bytes = exif_info[piexif.ExifIFD.DateTimeOriginal] + return datetime_bytes.decode('utf-8') + + # DateTime (파일 수정 시간) + if piexif.ExifIFD.DateTime in exif_info: + datetime_bytes = exif_info[piexif.ExifIFD.DateTime] + return datetime_bytes.decode('utf-8') + + # 0th IFD의 DateTime + zeroth_info = exif_dict.get('0th', {}) + if piexif.ImageIFD.DateTime in zeroth_info: + datetime_bytes = zeroth_info[piexif.ImageIFD.DateTime] + return datetime_bytes.decode('utf-8') + + return None + + except Exception as e: + print(f"[WARN] 날짜 추출 실패 ({image_path}): {e}", file=sys.stderr) + return None + + +def extract_gps(image_path: str) -> Optional[Tuple[float, float, Optional[float]]]: + """ + 이미지에서 GPS 정보 추출 + returns: (latitude, longitude, altitude) 또는 None + """ + try: + img = Image.open(image_path) + exif_data = img.info.get('exif') + if not exif_data: + return None + + exif_dict = piexif.load(exif_data) + gps_info = exif_dict.get('GPS', {}) + + if not gps_info: + return None + + # 위도 + if piexif.GPSIFD.GPSLatitude in gps_info and piexif.GPSIFD.GPSLatitudeRef in gps_info: + lat = dms_to_decimal( + gps_info[piexif.GPSIFD.GPSLatitude], + gps_info[piexif.GPSIFD.GPSLatitudeRef].decode() + ) + else: + return None + + # 경도 + if piexif.GPSIFD.GPSLongitude in gps_info and piexif.GPSIFD.GPSLongitudeRef in gps_info: + lon = dms_to_decimal( + gps_info[piexif.GPSIFD.GPSLongitude], + gps_info[piexif.GPSIFD.GPSLongitudeRef].decode() + ) + else: + return None + + # 고도 (선택적) + altitude = None + if piexif.GPSIFD.GPSAltitude in gps_info: + alt_data = gps_info[piexif.GPSIFD.GPSAltitude] + altitude = alt_data[0] / alt_data[1] + + return (lat, lon, altitude) + + except Exception as e: + print(f"[WARN] GPS 추출 실패 ({image_path}): {e}", file=sys.stderr) + return None + + +def decimal_to_dms(decimal: float) -> Tuple[tuple, str]: + """ + 십진수 좌표를 DMS 형식으로 변환 + returns: (((deg, 1), (min, 1), (sec, 100)), ref) + """ + is_positive = decimal >= 0 + decimal = abs(decimal) + + degrees = int(decimal) + minutes = int((decimal - degrees) * 60) + seconds = int(((decimal - degrees) * 60 - minutes) * 60 * 100) + + return ((degrees, 1), (minutes, 1), (seconds, 100)) + + +def haversine_distance(lat1: float, lon1: float, lat2: float, lon2: float) -> float: + """ + 두 GPS 좌표 간의 거리 계산 (미터) + Haversine 공식 사용 + """ + R = 6371000 # 지구 반지름 (미터) + + phi1 = math.radians(lat1) + phi2 = math.radians(lat2) + delta_phi = math.radians(lat2 - lat1) + delta_lambda = math.radians(lon2 - lon1) + + a = math.sin(delta_phi / 2) ** 2 + \ + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2) ** 2 + c = 2 * math.asin(math.sqrt(a)) + + return R * c + + +def collect_gps_data(paths: List[str]) -> List[Tuple[str, float, float, Optional[float], Optional[str]]]: + """ + 모든 이미지의 GPS 정보와 촬영 날짜 수집 + returns: [(path, lat, lon, alt, datetime), ...] + """ + gps_data = [] + for p in paths: + gps = extract_gps(p) + datetime_str = extract_datetime(p) + if gps: + gps_data.append((p, gps[0], gps[1], gps[2], datetime_str)) + return gps_data + + +def find_center_image(gps_data: List[Tuple[str, float, float, Optional[float], Optional[str]]]) -> Tuple[str, float, float, Optional[str]]: + """ + GPS 좌표의 중심에 가장 가까운 이미지 찾기 + returns: (center_image_path, center_lat, center_lon, datetime) + """ + if not gps_data: + raise ValueError("GPS 정보가 있는 이미지가 없습니다.") + + # 중심 좌표 계산 + center_lat = sum(d[1] for d in gps_data) / len(gps_data) + center_lon = sum(d[2] for d in gps_data) / len(gps_data) + + # 중심에 가장 가까운 이미지 찾기 + min_dist = float('inf') + center_image = gps_data[0][0] + center_datetime = gps_data[0][4] + + for path, lat, lon, _, datetime_str in gps_data: + dist = haversine_distance(center_lat, center_lon, lat, lon) + if dist < min_dist: + min_dist = dist + center_image = path + center_datetime = datetime_str + + print(f"[GPS] 중심 좌표: ({center_lat:.6f}, {center_lon:.6f})") + print(f"[GPS] 중앙 이미지: {os.path.basename(center_image)} (중심으로부터 {min_dist:.1f}m)") + if center_datetime: + print(f"[GPS] 촬영 날짜: {center_datetime}") + + return center_image, center_lat, center_lon, center_datetime + + +def create_gps_exif(lat: float, lon: float, alt: Optional[float] = None, datetime_str: Optional[str] = None) -> bytes: + """ + GPS 좌표와 촬영 날짜로 EXIF 데이터 생성 + datetime_str: "YYYY:MM:DD HH:MM:SS" 형식 + """ + exif_dict = { + "GPS": {}, + "Exif": {}, + "0th": {} + } + + # 위도 + lat_dms = decimal_to_dms(abs(lat)) + exif_dict["GPS"][piexif.GPSIFD.GPSLatitude] = lat_dms + exif_dict["GPS"][piexif.GPSIFD.GPSLatitudeRef] = b'N' if lat >= 0 else b'S' + + # 경도 + lon_dms = decimal_to_dms(abs(lon)) + exif_dict["GPS"][piexif.GPSIFD.GPSLongitude] = lon_dms + exif_dict["GPS"][piexif.GPSIFD.GPSLongitudeRef] = b'E' if lon >= 0 else b'W' + + # 고도 (있는 경우) + if alt is not None: + exif_dict["GPS"][piexif.GPSIFD.GPSAltitude] = (int(alt * 100), 100) + exif_dict["GPS"][piexif.GPSIFD.GPSAltitudeRef] = 0 # 해발 + + # 촬영 날짜/시간 (있는 경우) + if datetime_str: + datetime_bytes = datetime_str.encode('utf-8') + exif_dict["Exif"][piexif.ExifIFD.DateTimeOriginal] = datetime_bytes + exif_dict["Exif"][piexif.ExifIFD.DateTimeDigitized] = datetime_bytes + exif_dict["0th"][piexif.ImageIFD.DateTime] = datetime_bytes + + return piexif.dump(exif_dict) + + +def save_image_with_gps( + pano: np.ndarray, + output_path: str, + lat: float, + lon: float, + alt: Optional[float] = None, + datetime_str: Optional[str] = None +) -> None: + """ + GPS 정보와 촬영 날짜를 포함하여 이미지 저장 + """ + # OpenCV BGR을 PIL RGB로 변환 + pano_rgb = cv2.cvtColor(pano, cv2.COLOR_BGR2RGB) + pil_image = Image.fromarray(pano_rgb) + + # GPS EXIF 생성 + exif_bytes = create_gps_exif(lat, lon, alt, datetime_str) + + # 저장 + pil_image.save(output_path, exif=exif_bytes, quality=95) + print(f"[GPS] GPS 정보 저장 완료: ({lat:.6f}, {lon:.6f})") + if datetime_str: + print(f"[GPS] 촬영 날짜 저장 완료: {datetime_str}") + + +# ========== 기존 함수 ========== + +# def guess_defaults() -> Tuple[str, str]: +# """ +# 인자 없을 때 wing/photo/drone 같은 실제 구조를 자동으로 잡도록 탐색. +# """ +# base = os.getcwd() + +# candidates = [ +# (os.path.join(base, "photo", "drone"), "drone"), +# (os.path.join(base, "photo", "phone"), "phone"), +# (os.path.join(base, "photo"), "phone"), +# (os.path.join(base, "photos", "drone"), "drone"), +# (os.path.join(base, "photos", "phone"), "phone"), +# (os.path.join(base, "photos"), "phone"), +# ] + +# for folder, mode in candidates: +# paths = list_images(folder) +# if len(paths) >= 2: +# return mode, folder + +# return "phone", os.path.join(base, "photo") + + +def main(): + ap = argparse.ArgumentParser(description="드론/폰 사진 폴더 합성(스티칭) + GPS 보존") + ap.add_argument("--mode", choices=["drone", "phone"], help="촬영 유형 (drone/phone)") + ap.add_argument("--input", help="이미지 폴더 경로") + ap.add_argument("--out", default="", help="출력 파일 경로 (비우면 자동 생성)") + ap.add_argument("--model", default="", help="합성에 사용될 카메라 모델") + ap.add_argument("--max-dim", type=int, default=2600, help="긴 변 기준 리사이즈(0이면 원본)") + ap.add_argument("--enhance", action="store_true", help="대비 보정(CLAHE)") + ap.add_argument("--try-fallback", action="store_true", help="실패 시 다른 모드로 재시도") + ap.add_argument("--debug", action="store_true", help="디버그 출력(파일 목록 일부 표시)") + ap.add_argument( + "--gps-strategy", + choices=["center", "first", "none"], + default="center", + help="GPS 저장 방식: center(중앙값), first(첫 이미지), none(저장안함)" + ) + args = ap.parse_args() + + # 인자 없을 때도 동작 + # if not args.mode or not args.input: + # g_mode, g_input = guess_defaults() + # args.mode = args.mode or g_mode + # args.input = args.input or g_input + # print(f"[INFO] args missing -> auto defaults: --mode {args.mode} --input {args.input}") + + paths = list_images(args.input, args.model) + if args.debug: + print(f"[DEBUG] input folder: {args.input}") + print(f"[DEBUG] found paths: {len(paths)}") + for p in paths[:10]: + print(f" - {os.path.basename(p)}") + + if len(paths) < 2: + raise RuntimeError(f"합성할 이미지가 2장 이상 필요합니다. input={args.input}") + + # GPS 정보 수집 + gps_data = collect_gps_data(paths) + if gps_data: + print(f"[GPS] GPS 정보가 있는 이미지: {len(gps_data)}/{len(paths)}장") + else: + print("[GPS] GPS 정보가 있는 이미지가 없습니다.") + + # 중앙 이미지 찾기 + center_image_path = None + gps_lat, gps_lon, gps_alt, gps_datetime = None, None, None, None + + if gps_data and args.gps_strategy != "none": + if args.gps_strategy == "center": + center_image_path, gps_lat, gps_lon, gps_datetime = find_center_image(gps_data) + # 중앙 이미지의 고도 사용 + for path, lat, lon, alt, dt in gps_data: + if path == center_image_path: + gps_alt = alt + break + elif args.gps_strategy == "first": + # 첫 번째 GPS 있는 이미지 사용 + first_gps = gps_data[0] + center_image_path = first_gps[0] + gps_lat, gps_lon, gps_alt, gps_datetime = first_gps[1], first_gps[2], first_gps[3], first_gps[4] + print(f"[GPS] 첫 번째 이미지 GPS 사용: {os.path.basename(center_image_path)}") + if gps_datetime: + print(f"[GPS] 촬영 날짜: {gps_datetime}") + + # 이미지 로드 및 전처리 + images = load_images(paths) + if len(images) < 2: + raise RuntimeError("읽을 수 있는 이미지가 2장 미만입니다.") + + images = preprocess(images, max_dim=args.max_dim, enhance=args.enhance) + + # 스티칭 + pano = run_stitch(args.mode, images, try_fallback=args.try_fallback) + + # 출력 경로 자동 생성 + if not args.out.strip(): + out_dir = os.path.join(os.getcwd(), "out") + os.makedirs(out_dir, exist_ok=True) + args.out = os.path.join(out_dir, f"{args.mode}_stitched.jpg") + + out_dir = os.path.dirname(os.path.abspath(args.out)) + os.makedirs(out_dir, exist_ok=True) + + # GPS 정보와 함께 저장 + if gps_lat is not None and gps_lon is not None: + save_image_with_gps(pano, args.out, gps_lat, gps_lon, gps_alt, gps_datetime) + else: + # GPS 정보 없으면 일반 저장 + if not cv2.imwrite(args.out, pano): + raise RuntimeError(f"저장 실패: {args.out}") + print(f"[INFO] GPS 정보 없이 저장") + + h, w = pano.shape[:2] + print(f"[DONE] saved: {args.out} (size={w}x{h})") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/prediction/image/project_brief.md b/prediction/image/project_brief.md new file mode 100644 index 0000000..2adcc97 --- /dev/null +++ b/prediction/image/project_brief.md @@ -0,0 +1,395 @@ +# Project Brief — prediction/image + +## 1. 프로젝트 목적 + +항공/드론 카메라로 촬영된 해양 유류 오염 이미지를 자동 분석하여 유류 확산 정보를 추출·반환하는 **이미지 분석 백엔드 서비스**이다. + +- 드론(mx15hdi) 또는 열화상(starsafire) 카메라 이미지를 입력받아 +- AI 세그멘테이션으로 유류 유형(검정/갈색/무지개/은색)을 탐지하고 +- 지리참조(GeoTIFF) 변환 후 유류 면적·부피가 담긴 Shapefile을 생성하며 +- 최종적으로 위치 메타데이터와 유류 분석 결과를 JSON으로 반환한다. + +지원 카메라 타입: `mx15hdi` (EO/나디르 드론), `starsafire` (열화상 카메라) + +--- + +## 2. 전체 처리 흐름 + +``` +클라이언트 + │ + │ POST /run-script/ (camTy, fileId, image 파일) + ▼ +api.py + ├─ GPS EXIF 검증 (check_gps_info) + ├─ 이미지 저장 → {camTy}/Metadata/Image/Original_Images/{fileId}/ + │ + └─ subprocess: Combine_module.py {fileId} + │ + ├── [1] Detect/Inference.py AI 세그멘테이션 + │ └─ 결과: Detect/result/{fileId}/ (블렌딩 이미지) + │ Detect/Mask_result/{fileId}/ (컬러 마스크) + │ + ├── [2] Metadata/Scripts/Export_Metadata_mx15hdi.py + │ └─ EXIF 추출 + 보간 저장 + │ Metadata/CSV/{fileId}/mx15hdi.csv + │ Metadata/CSV/{fileId}/mx15hdi_interpolation.csv + │ + ├── [3] Georeference/Scripts/Create_Georeferenced_Images_nadir.py + │ └─ 핀홀 투영 → GeoTIFF 저장 + │ Georeference/Tif/{fileId}/ (컬러 블렌딩 TIF) + │ Georeference/Mask_Tif/{fileId}/ (마스크 TIF) + │ + └── [4] Polygon/Scripts/Oilshape.py + └─ 마스크 TIF → 폴리곤 추출 → Shapefile 저장 + Polygon/Shp/{fileId}/*.shp + │ + ├─ extract_data.get_metadata() → CSV에서 첫 번째 행 추출 + └─ extract_data.get_oil_type() → Shapefile에서 유류 폴리곤 목록 추출 + │ + ▼ + JSON 응답 { meta: "...", data: [...] } +``` + +--- + +## 3. 주요 스크립트 설명 + +### 3-1. `api.py` — FastAPI 서버 (포트 5001) + +프로젝트의 진입점. 모든 엔드포인트를 정의하며 파이프라인 실행 및 결과 반환을 담당한다. + +**보조 함수** +| 함수 | 설명 | +|------|------| +| `check_gps_info(image_path)` | 이미지 EXIF에서 GPS IFD 존재 여부 확인 | +| `check_camera_info(image_file)` | 이미지 EXIF에서 카메라 모델명 추출 | + +--- + +### 3-2. `extract_data.py` — 결과 데이터 추출 + +파이프라인 완료 후 CSV·Shapefile에서 결과를 읽어 API 응답에 포함시키는 유틸리티 모듈. + +**`get_metadata(camTy, fileId)`** +- CSV 파일 경로를 카메라 타입에 따라 선택 + - mx15hdi: `{camTy}/Metadata/CSV/{fileId}/mx15hdi_interpolation.csv` + - starsafire: `{camTy}/Metadata/CSV/{fileId}/Metadata_Extracted.csv` +- 헤더 이후 첫 번째 데이터 행을 쉼표로 이어 문자열 반환 + +**`get_oil_type(camTy, fileId)`** +- `{camTy}/Polygon/Shp/{fileId}/*.shp` 에서 첫 번째 Shapefile 로드 +- CRS를 EPSG:4326으로 변환 +- 각 피처에서 `class_id`, `area_m2`, `volume_m3`, `note` 추출 +- 유류 유형별 두께 매핑 적용 (mm → m 변환) + - 1(검정/Emulsion): 1.0mm, 2(갈색/Crude): 0.1mm, 3(무지개): 0.0003mm, 4(은색): 0.0001mm +- 반환값: `[{classId, area, volume, note, thickness, wkt}, ...]` + +--- + +### 3-3. `dbInsert_csv.py` — CSV 메타데이터 DB 저장 + +현재 API에서는 주석 처리되어 있으며 독립 실행(CLI)으로도 사용 가능한 스크립트. + +- CSV에서 촬영 일시 및 대상 위경도(DMS → 십진수 변환)를 파싱 +- `env_safe.unmnd_poll_info` 테이블에 INSERT +- 카메라 타입에 따라 CSV 컬럼 파싱 방식이 다름 + - mx15hdi: `Date1/Date2/Date3`, `Time1/Time2/Time3`, DMS 6필드 + - starsafire: `Date`, `Time`, DMS 4필드(초 없음) + +--- + +### 3-4. `dbInsert_shp.py` — Shapefile 유류 폴리곤 DB 저장 + +현재 API에서는 주석 처리되어 있으며 독립 실행(CLI)으로도 사용 가능한 스크립트. + +- Shapefile에서 유류 폴리곤 피처를 읽어 `env_safe.poll_mat_info` 테이블에 배치 INSERT +- 폴리곤 지오메트리를 `ST_GeomFromText(wkt, 4326)`으로 저장 +- 컬럼: `poll_id`, `algo_ty(유류명)`, `mat_ty(note)`, `mat_area`, `mat_thick`, `mat_vol`, `mat_geom` + +--- + +### 3-5. `pic_gps.py` — 드론/폰 사진 스티칭 + GPS 보존 + +`/stitch` 엔드포인트가 호출하는 이미지 합성 전용 스크립트. CLI 독립 실행도 지원. + +- **스티칭 모드**: drone(SCANS 우선) / phone(PANORAMA 우선), 실패 시 폴백 가능 +- **전처리**: 최대 해상도 리사이즈, CLAHE 대비 보정(`--enhance`) +- **GPS 전략**: center(GPS 중앙값), first(첫 이미지 GPS), none + - Haversine 공식으로 중심 좌표에서 가장 가까운 이미지 선택 +- **출력**: GPS EXIF가 삽입된 JPEG 합성 이미지 + +--- + +### 3-6. `mx15hdi/Main/Combine_module.py` — 파이프라인 오케스트레이터 + +`/run-script/` 엔드포인트가 subprocess로 실행하는 메인 파이프라인 스크립트. + +- 커맨드라인 인자: `fileId` +- 다음 스크립트를 순서대로 실행: + 1. `../Detect/Inference.py` + 2. `../Metadata/Scripts/Export_Metadata_mx15hdi.py` + 3. `../Georeference/Scripts/Create_Georeferenced_Images_nadir.py` + 4. `../Polygon/Scripts/Oilshape.py` + +--- + +### 3-7. `mx15hdi/Detect/Inference.py` — AI 세그멘테이션 추론 + +MMSegmentation 기반 유류 세그멘테이션 모델을 실행하는 스크립트. + +- 모델: `V7_SPECIAL.py` 설정 + `epoch_165.pth` 가중치, `cuda:0` 디바이스 +- 분류 클래스: background, black, brown, rainbow, silver (5클래스) +- 처리 흐름: + 1. `Original_Images/{fileId}/` 내 이미지 파일 열거 + 2. `inference_segmentor()`로 세그멘테이션 맵 생성 + 3. 팔레트 기반 컬러 마스크 생성 (alpha=0.6 블렌딩) + 4. 블렌딩 이미지 → `Detect/result/{fileId}/` + 5. 컬러 마스크 이미지 → `Detect/Mask_result/{fileId}/` + +--- + +### 3-8. `mx15hdi/Metadata/Scripts/Export_Metadata_mx15hdi.py` — 메타데이터 추출 + +이미지의 EXIF 정보를 파싱하여 CSV로 저장하는 스크립트. + +- **`meta_info.extract_and_save_image_metadata()`**: Pillow로 EXIF 읽기 + - 촬영 시각(DateTimeOriginal), GPS(위도/경도/고도) 추출 + - DMS 형식으로 변환하여 CSV 저장 (`mx15hdi.csv`) + - 컬럼: `Filename`, `Tlat_d/m/s`, `Tlon_d/m/s`, `Alat_d/m/s`, `Alon_d/m/s`, `Az`, `El`, `Alt`, `Date1/2/3`, `Time1/2/3` +- **`meta_info.geo_info()`**: OCR(PaddleOCR)로 이미지 HUD에서 메타데이터 추출 (현재 주석 처리) +- **`meta_info.interpolation()`**: 결측값 전방/후방 보간 후 `mx15hdi_interpolation.csv` 저장 + +--- + +### 3-9. `mx15hdi/Georeference/Scripts/Create_Georeferenced_Images_nadir.py` — 지리참조 변환 + +나디르(수직 하향) 촬영 이미지를 GeoTIFF로 변환하는 스크립트. + +- 카메라 파라미터: Hasselblad L2D-20c 기준 (초점거리 12.29mm, 4/3" 센서) +- 처리 흐름: + 1. `mx15hdi_interpolation.csv`에서 위경도(DMS) 및 고도(feet) 로드 + 2. EPSG:4326 → EPSG:5187(한국 TM 좌표계) 변환 + 3. 핀홀 카메라 투영으로 픽셀 → 지상 좌표 매핑 + 4. GSD(지상 샘플 거리) 계산 및 출력 해상도 결정 + 5. 블렌딩 이미지 → `Georeference/Tif/{fileId}/*_gsd.tif` + 6. 마스크 이미지 → `Georeference/Mask_Tif/{fileId}/*_gsd.tif` +- 결측 픽셀 보완: binary_dilation + cv2.dilate 2회 반복 + +--- + +### 3-10. `mx15hdi/Polygon/Scripts/Oilshape.py` — 유류 폴리곤 생성 + +마스크 GeoTIFF에서 유류 영역을 폴리곤으로 추출하여 Shapefile로 저장하는 스크립트. + +- **`get_class_mask()`**: RGB 마스크를 유클리드 거리 기반으로 클래스 ID 마스크로 변환 +- **`mask_to_polygons()`**: `cv2.findContours(RETR_CCOMP)`로 외곽선 추출, 홀(내부 폴리곤) 처리, `approxPolyDP`로 단순화 +- **`save_polygons_to_shapefile()`**: + - 픽셀 좌표 → 지리 좌표 변환 (`rasterio.transform.xy`) + - Shapely Polygon 생성 + `.buffer(0)`으로 geometry 정규화 + - 면적(m²), 두께(mm→m), 부피(m³), note 계산 후 GeoDataFrame 저장 + - 출력: `Polygon/Shp/{fileId}/*.shp` + +--- + +## 4. API 엔드포인트 상세 + +### `POST /run-script/` + +전체 분석 파이프라인을 실행하고 결과를 반환하는 메인 엔드포인트. + +**입력 (multipart/form-data)** + +| 파라미터 | 타입 | 필수 | 설명 | +|----------|------|------|------| +| `camTy` | string | ✅ | 카메라 타입. `"mx15hdi"` 또는 `"starsafire"` | +| `fileId` | string | ✅ | 분석 세션 식별자 (파일 저장 폴더명으로 사용) | +| `image` | file | ✅ | 분석할 이미지 파일 (PNG/JPG) | + +**처리 흐름** +1. `camTy` 유효성 검사 +2. 이미지를 `{camTy}/Metadata/Image/Original_Images/{fileId}/`에 저장 +3. GPS EXIF 검증 — GPS 없으면 `{"detail": "GPS Infomation Not Found"}` 반환 +4. `Combine_module.py {fileId}` subprocess 실행 (타임아웃 300초) +5. `get_metadata()` + `get_oil_type()` 호출 + +**출력 (200 OK)** +```json +{ + "meta": "filename,lat_d,lat_m,lat_s,...", + "data": [ + { + "classId": "검정", + "area": 152.3, + "volume": 0.1523, + "note": "Black - Emulsion", + "thickness": 0.001, + "wkt": "POLYGON ((...))" + } + ] +} +``` + +**에러 응답** +| 코드 | 조건 | +|------|------| +| 400 | camTy가 유효하지 않은 경우 | +| 404 | Combine_module.py 파일이 없는 경우 | +| 500 | subprocess 타임아웃 또는 기타 오류 | + +--- + +### `GET /get-metadata/{camTy}/{fileId}` + +이미 처리된 결과에서 메타데이터와 유류 정보를 조회하는 엔드포인트 (파이프라인 실행 없음). + +**입력 (Path Parameters)** + +| 파라미터 | 타입 | 설명 | +|----------|------|------| +| `camTy` | string | 카메라 타입 (`mx15hdi` / `starsafire`) | +| `fileId` | string | 분석 세션 식별자 | + +**출력 (200 OK)** + +`/run-script/`와 동일한 응답 구조. +```json +{ + "meta": "filename,lat_d,lat_m,...", + "data": [ { "classId": "갈색", "area": ..., "wkt": "..." } ] +} +``` + +--- + +### `GET /get-original-image/{camTy}/{fileId}` + +저장된 원본 이미지를 Base64 문자열로 반환하는 엔드포인트. + +**입력 (Path Parameters)** + +| 파라미터 | 타입 | 설명 | +|----------|------|------| +| `camTy` | string | 카메라 타입 | +| `fileId` | string | 분석 세션 식별자 | + +**처리**: `{camTy}/Metadata/Image/Original_Images/{fileId}/` 디렉토리에서 `.png` 또는 `.jpg` 파일을 찾아 Base64 인코딩 + +**출력 (200 OK)** +``` +"" +``` + +--- + +### `GET /get-image/{camTy}/{fileId}` + +지리참조된 GeoTIFF를 PNG로 변환하여 좌표 경계(Bounding Box)와 함께 반환하는 엔드포인트. + +**입력 (Path Parameters)** + +| 파라미터 | 타입 | 설명 | +|----------|------|------| +| `camTy` | string | 카메라 타입 | +| `fileId` | string | 분석 세션 식별자 | + +**처리**: +1. `{camTy}/Georeference/Tif/{fileId}/*.tif` 파일 탐색 +2. rasterio로 CRS 및 Bounds 추출 +3. CRS가 EPSG:4326이 아닌 경우 pyproj로 변환 +4. raster 데이터를 PNG로 변환 후 Base64 인코딩 + +**출력 (200 OK)** +```json +{ + "minLon": 126.123456, + "minLat": 34.123456, + "maxLon": 126.234567, + "maxLat": 34.234567, + "image": "" +} +``` + +--- + +### `POST /stitch` + +여러 장의 드론/폰 사진을 스티칭하여 합성 이미지를 반환하는 엔드포인트. + +**입력 (multipart/form-data)** + +| 파라미터 | 타입 | 필수 | 설명 | +|----------|------|------|------| +| `files` | file[] | ✅ | 합성할 이미지 파일 목록 (최소 2장) | +| `fileId` | string | ✅ | 저장 디렉토리 식별자 | + +**처리 흐름** +1. 파일 수 최소 2장 검증 +2. 각 파일의 카메라 모델명 추출 (`check_camera_info`) +3. `stitch/{fileId}/` 디렉토리에 파일 저장 (파일명 형식: `{model}_{idx:03d}_{원본파일명}`) +4. 가장 많이 나온 카메라 모델을 `--model` 인자로 사용 +5. `pic_gps.py --mode drone --input ... --out ... --model ... --enhance` subprocess 실행 (타임아웃 300초) + +**출력 (200 OK)** + +합성된 JPEG 이미지 파일 (`FileResponse`, `image/jpeg`) + +**에러 응답** +| 코드 | 조건 | +|------|------| +| 400 | 이미지가 2장 미만인 경우 | +| 500 | 스티칭 subprocess 실패 | + +--- + +## 5. 디렉토리 구조 + +``` +prediction/image/ +├── api.py FastAPI 서버 (포트 5001) +├── extract_data.py 결과 데이터 추출 유틸리티 +├── dbInsert_csv.py CSV → PostgreSQL (현재 비활성화) +├── dbInsert_shp.py Shapefile → PostgreSQL (현재 비활성화) +├── pic_gps.py 이미지 스티칭 + GPS 보존 +├── mx15hdi/ mx15hdi 카메라 처리 모듈 +│ ├── Main/ +│ │ └── Combine_module.py 파이프라인 오케스트레이터 +│ ├── Detect/ +│ │ ├── Inference.py MMSeg AI 세그멘테이션 +│ │ ├── V7_SPECIAL.py 모델 설정 +│ │ ├── epoch_165.pth 학습된 모델 가중치 +│ │ └── mmsegmentation/ MMSegmentation 라이브러리 +│ ├── Metadata/ +│ │ ├── Image/Original_Images/ 업로드 이미지 저장소 +│ │ ├── CSV/ 메타데이터 CSV 출력 +│ │ └── Scripts/ +│ │ └── Export_Metadata_mx15hdi.py +│ ├── Georeference/ +│ │ ├── Tif/ 컬러 GeoTIFF 출력 +│ │ ├── Mask_Tif/ 마스크 GeoTIFF 출력 +│ │ └── Scripts/ +│ │ └── Create_Georeferenced_Images_nadir.py +│ ├── Polygon/ +│ │ ├── Shp/ Shapefile 출력 +│ │ └── Scripts/ +│ │ └── Oilshape.py +│ └── GSD/ GSD 중간 데이터 +├── starsafire/ starsafire 카메라 처리 모듈 (구조 동일) +└── stitch/ 스티칭 결과 저장소 +``` + +--- + +## 6. 주요 의존성 + +| 라이브러리 | 용도 | +|------------|------| +| fastapi, uvicorn | API 서버 | +| rasterio, pyproj, osgeo(GDAL) | 지리참조·좌표 변환 | +| geopandas, shapely | Shapefile 처리 | +| mmsegmentation, torch | AI 세그멘테이션 | +| paddleocr | HUD OCR (starsafire 전용) | +| opencv-contrib-python | 이미지 처리·스티칭 | +| Pillow, piexif | EXIF 메타데이터 | +| psycopg2 | PostgreSQL 연결 | +| pandas, numpy | 데이터 처리 | diff --git a/prediction/image/requirements.txt b/prediction/image/requirements.txt new file mode 100644 index 0000000..c0f9f97 --- /dev/null +++ b/prediction/image/requirements.txt @@ -0,0 +1,30 @@ +# API 프레임워크 +fastapi==0.111.0 +uvicorn[standard]==0.29.0 + +# 이미지 처리 +numpy==1.26.4 +# opencv-contrib-python-headless: headless(GUI 불필요) + contrib(Stitcher 등) 통합 +opencv-contrib-python-headless==4.9.0.80 +Pillow==10.3.0 +piexif==1.1.3 +scikit-image==0.19.3 +matplotlib==3.5.1 + +# 지리 데이터 처리 +rasterio==1.3.10 +geopandas==0.14.4 +shapely==2.0.4 +pyproj==3.6.1 +# osgeo(GDAL Python 바인딩)는 시스템 GDAL 버전과 맞춰야 하므로 Dockerfile에서 설치 + +# AI/ML — PyTorch는 base 이미지에 포함, mmcv/mmsegmentation은 Dockerfile에서 설치 +# mmcv-full==1.4.3 은 torch/CUDA 버전에 맞는 pre-built 휠이 필요하여 Dockerfile에서 직접 설치 + +# OCR (메타데이터 추출: Export_Metadata_mx15hdi.py) +paddlepaddle==2.6.2 +paddleocr==2.7.0.2 + +# 유틸리티 +pandas==2.2.2 +tqdm==4.66.4 diff --git a/prediction/opendrift/.dockerignore b/prediction/opendrift/.dockerignore new file mode 100644 index 0000000..0ba08e2 --- /dev/null +++ b/prediction/opendrift/.dockerignore @@ -0,0 +1,2 @@ +__pycache__/ +result/ \ No newline at end of file diff --git a/prediction/opendrift/CLAUDE.md b/prediction/opendrift/CLAUDE.md new file mode 100644 index 0000000..d828991 --- /dev/null +++ b/prediction/opendrift/CLAUDE.md @@ -0,0 +1,116 @@ +# CLAUDE.md + +이 파일은 Claude Code (claude.ai/code)가 이 저장소의 코드를 작업할 때 참고할 수 있는 가이드를 제공합니다. + +## 프로젝트 개요 + +이 프로젝트는 OpenDrift 기반의 **기름 유출 모델링 및 예측 시스템**입니다. OpenDrift는 라그랑지안 입자 기반 해양 표류 모델링 프레임워크입니다. 이 시스템은 기상(GDAPS) 및 해양(MOHID) 예보 데이터를 활용하여 기름 유출 궤적, 풍화 과정(증발, 유화), 환경 영향을 시뮬레이션합니다. + +## API 서버 실행 + +```bash +# 서버 시작 (uvicorn 4 workers, 포트 5003) +./startup.sh + +# 서버 중지 +./shutdown.sh + +# 로그 파일: uvicorn.log +# PID 파일: server.pid +``` + +## API 엔드포인트 + +- `GET /get-received-date` - 최신 예보 수신 가능 날짜 조회 +- `GET /get-uv/{datetime}/{category}` - 바람/해류 시각화 데이터 (category: "wind" 또는 "hydr") +- `GET /get-base64/{datetime}/{img_type}` - Base64 인코딩된 지도 이미지 +- `POST /check-nc` - 특정 시작 시간에 대한 NetCDF 파일 존재 여부 확인 +- `POST /run-model` - 기름 유출 시뮬레이션 실행 + +### run-model 요청 본문 +```json +{ + "startTime": "2025-01-15 12:00:00", + "runTime": 72, + "matTy": "CRUDE OIL", + "matVol": 100.0, + "lon": 126.1, + "lat": 36.6, + "spillTime": 12, + "name": "simulation_id" +} +``` + +## 아키텍처 + +### 핵심 흐름 +1. **api.py** - FastAPI 진입점, 요청 처리 및 OpenOil 시뮬레이션 실행 +2. **createJsonResult.py** - 시뮬레이션 NetCDF 출력 처리, 시계열 데이터 추출(위치, 부피, 풍화 지표), 컨벡스 헐 및 해안 오염 계산 +3. 결과는 시간 단계별 입자 위치, 유류 부피, 환경 조건을 포함한 JSON으로 반환 + +### 주요 모듈 +| 파일 | 용도 | +|------|------| +| `calcCostlineLength.py` | `OilSpillCoastlineAnalyzer` 클래스 - 한국 해안선 shapefile에 KD-tree 공간 인덱싱을 사용하여 오염 해안선 길이 계산 | +| `convex_hull.py` | 입자 위치로부터 WKT 폴리곤 생성 | +| `extractUvFull.py` / `extractUvWithinBox.py` | 시각화를 위한 NetCDF에서 UV 바람/해류 벡터 추출 | +| `createWindJson.py` | 특정 지점의 바람 데이터 추출 | +| `latestForecastDate.py` | 예보 데이터 가용성 확인 | +| `weatherData.py` | 조석, 파고, 기상 데이터용 PostgreSQL/PostGIS 쿼리 | +| `findFile.py` | 폴백 로직이 포함된 시간 기반 파일 탐색기 | + +### 데이터 소스 +- **바람**: `/storage/pos_wind/` 또는 `/storage/wind/` - KMA GDAPS 파일 (`KO108_GDAPS_ATMO_SURF_YYYYMMDDhh.nc`) +- **해양**: `/storage/pos_hydr/` 또는 `/storage/hydr/` - MOHID 해양역학 파일 (`KO108_MOHID_HYDR_SURF_YYYYMMDDhh.nc`) +- **해안선**: `coastline/TN_SHORLINE.shp` (EPSG:5179, EPSG:4326으로 변환) + +### 파일 폴백 로직 +특정 날짜의 데이터 요청 시 파일이 없으면 최대 3일 전까지 순차적으로 확인합니다. + +## 주요 의존성 + +- **opendrift** - 핵심 기름 표류 시뮬레이션 엔진 (`opendrift.models.openoil.OpenOil`) +- **xarray** - NetCDF 파일 처리 +- **geopandas, shapely** - GIS 연산 및 지오메트리 +- **scipy.spatial.cKDTree** - 해안선 분석용 공간 인덱싱 +- **psycopg2** - PostgreSQL 데이터베이스 연결 +- **FastAPI/uvicorn** - 웹 API 프레임워크 + +## OpenOil 시뮬레이션 설정 + +```python +o.set_config('processes:evaporation', True) +o.set_config('processes:emulsification', True) +o.set_config('drift:vertical_mixing', True) +o.set_config('vertical_mixing:timestep', 5) +o.set_config('seed:m3_per_hour', matVol) +# 시간 간격: 900초, 출력 간격: 3600초 +``` + +## 오류 코드 + +| 코드 | 의미 | +|------|------| +| 5001 | FILE_NOT_FOUND - NetCDF 예보 파일 없음 | +| 5002 | PARSE_ERROR - JSON 추출 실패 | +| 5003 | MODELING_ERROR - OpenOil 시뮬레이션 실패 | +| 5004 | SYSTEM_ERROR - 일반 예외 | + +## 한국 해역 범위 + +```python +lon_range: (124.21, 129.96) +lat_range: (32.79, 38.96) +``` + +## 동시성 + +- API: uvicorn 4 workers +- 결과 처리: 병렬 시간 단계 계산을 위한 ThreadPoolExecutor 16 workers +- `OilSpillCoastlineAnalyzer`는 스레드 세이프 + +## 참고 사항 + +- 모든 시간은 시뮬레이션 전에 KST(UTC+9)에서 UTC로 변환됨 +- 시뮬레이션 결과는 `result/{name}.nc`에 저장됨 +- 해양 데이터에서 100도 이상의 온도 값은 NaN으로 마스킹됨 diff --git a/prediction/opendrift/api.py b/prediction/opendrift/api.py new file mode 100644 index 0000000..52c76fa --- /dev/null +++ b/prediction/opendrift/api.py @@ -0,0 +1,267 @@ +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse + +import sys +import asyncio +import uuid +import os +import numpy as np +from concurrent.futures import ThreadPoolExecutor +from enum import Enum +from datetime import datetime, timedelta +from typing import Optional +from opendrift.readers import reader_netCDF_CF_generic +from opendrift.models.openoil import OpenOil + +from config import STORAGE, COORDS, SIM +from logger import get_logger +from utils import check_nc_file_by_date, check_nc_files_for_date, check_img_file_by_date, kst_to_utc +from createJsonResult import extract_and_save_data_to_json as extract_json +from findFile import find_nearest_earlier_file as find_file +from extractUvFull import extract_uv_full +from latestForecastDate import get_earliest_latest_forecast_date + +logger = get_logger("api") +app = FastAPI() + +# ============================================================ +# Workers 포화 관리 (단일 프로세스 기준 — startup.sh: --workers 1) +# ============================================================ +MAX_CONCURRENT = int(os.getenv('MAX_CONCURRENT_JOBS', '4')) +jobs: dict[str, dict] = {} +_thread_pool = ThreadPoolExecutor(max_workers=MAX_CONCURRENT) + +# ============================================================ +# Parcels 선택적 로드 (없어도 동작) +# ============================================================ +try: + sys.path.insert(0, str(STORAGE.PARCELS_PATH)) + from parcels_api import router as parcels_router # type: ignore + app.include_router(parcels_router) + logger.info("Parcels router 로드 완료") +except Exception as _e: + logger.warning(f"Parcels router 로드 건너뜀 (정상): {_e}") + + +class CustomErrorCode(Enum): + FILE_NOT_FOUND = 5001 + PARSE_ERROR = 5002 + MODELING_ERROR = 5003 + SYSTEM_ERROR = 5004 + + +def _parse_datetime(dt_str: Optional[str]) -> Optional[datetime]: + """다양한 형식의 날짜 문자열을 datetime으로 변환 (KST 기준)""" + if not dt_str: + return None + formats = [ + "%Y-%m-%dT%H:%M:%S.%fZ", + "%Y-%m-%dT%H:%M:%SZ", + "%Y-%m-%dT%H:%M:%S", + "%Y-%m-%d %H:%M:%S", + "%Y%m%d%H", + ] + for fmt in formats: + try: + return datetime.strptime(dt_str, fmt) + except ValueError: + continue + return None + + +# ============================================================ +# 기존 API 엔드포인트 (변경 없음) +# ============================================================ + +@app.get("/get-received-date") +async def get_received_date(): + """예보 수신일 및 가능일 확인""" + result = get_earliest_latest_forecast_date() + if result: + return JSONResponse(content=result, status_code=200) + return JSONResponse(content={ + "error_code": CustomErrorCode.FILE_NOT_FOUND.value, + "message": "File not found error" + }, status_code=200) + + +@app.get("/get-uv/{datetime_str}/{category}") +async def get_uv(datetime_str: str, category: str): + """바람, 해수 시각화용 데이터 리턴""" + date_obj = kst_to_utc(datetime.strptime(datetime_str, "%Y%m%d%H")) + if category == "wind": + nc_path, date = check_nc_file_by_date(str(STORAGE.WIND), date_obj) + else: + nc_path, date = check_nc_file_by_date(str(STORAGE.HYDR), date_obj) + result = extract_uv_full( + nc_path, + date_obj.strftime("%Y-%m-%d %H:%M:%S"), + category, + skip=1, + lon_range=COORDS.lon_range, + lat_range=COORDS.lat_range + ) + return JSONResponse(content={"result": result}, status_code=200) + + +# ============================================================ +# NC 파일 확인 (수정: 404 반환으로 Node.js !checkRes.ok 연동) +# ============================================================ + +@app.post("/check-nc") +async def check_nc(request: Request): + """기상 데이터 존재 여부 확인. startTime(KST) 기준으로 NC 파일 조회.""" + body = await request.json() + start_time_str = body.get('startTime') or body.get('start_time') + + try: + date_obj = _parse_datetime(start_time_str) + if date_obj is None: + date_obj = datetime.now() + + date_utc = kst_to_utc(date_obj) + wind_nc_path, ocean_nc_path, _, _ = check_nc_files_for_date(date_utc) + + if not wind_nc_path or not ocean_nc_path: + return JSONResponse(content={"message": "not exist"}, status_code=404) + + return JSONResponse(content={"message": "exist"}, status_code=200) + + except Exception: + logger.exception("Error checking NC files") + return JSONResponse(content={ + "error_code": CustomErrorCode.SYSTEM_ERROR.value, + "message": "System Error" + }, status_code=500) + + +# ============================================================ +# 비동기 시뮬레이션 실행 (Workers 포화 제어) +# ============================================================ + +@app.post("/run-model") +async def run_model(request: Request): + """기름 유출 시뮬레이션 비동기 실행. job_id를 즉시 반환하고 백그라운드에서 처리.""" + running = sum(1 for j in jobs.values() if j['status'] == 'RUNNING') + if running >= MAX_CONCURRENT: + return JSONResponse(status_code=503, content={ + 'success': False, + 'error': '분석 서버가 사용 중입니다. 잠시 후 재시도해 주세요.', + 'running': running, + 'max': MAX_CONCURRENT, + }) + + body = await request.json() + job_id = str(uuid.uuid4()) + jobs[job_id] = {'status': 'RUNNING', 'result': None, 'error': None} + asyncio.create_task(_run_simulation(job_id, body)) + + return JSONResponse(content={'success': True, 'job_id': job_id, 'status': 'RUNNING'}, status_code=200) + + +@app.get("/status/{job_id}") +async def get_job_status(job_id: str): + """시뮬레이션 작업 상태 조회""" + if job_id not in jobs: + return JSONResponse(content={'error': 'Job not found'}, status_code=404) + + job = jobs[job_id] + if job['status'] == 'DONE': + return JSONResponse(content={'status': 'DONE', 'result': job['result']}) + if job['status'] == 'ERROR': + return JSONResponse(content={'status': 'ERROR', 'error': job['error']}) + return JSONResponse(content={'status': 'RUNNING'}) + + +async def _run_simulation(job_id: str, body: dict) -> None: + """시뮬레이션을 ThreadPoolExecutor에서 실행하고 결과를 jobs 딕셔너리에 저장""" + loop = asyncio.get_event_loop() + try: + result = await loop.run_in_executor(_thread_pool, _simulate_sync, body, job_id) + jobs[job_id] = {'status': 'DONE', 'result': result, 'error': None} + except Exception as e: + logger.exception(f"시뮬레이션 오류 (job_id={job_id})") + jobs[job_id] = {'status': 'ERROR', 'result': None, 'error': str(e)} + + +def _simulate_sync(body: dict, job_id: str): + """동기 시뮬레이션 로직 (ThreadPoolExecutor에서 실행)""" + start_time_str = body.get('startTime') or body.get('start_time') + run_time = body.get('runTime') or body.get('run_time') + mat_ty = body.get('matTy') or body.get('mat_ty') + mat_vol = body.get('matVol') or body.get('mat_vol') + lon = body.get('lon') + lat = body.get('lat') + spill_time = body.get('spillTime') or body.get('spill_time') + name = body.get('name') or job_id # name 없으면 job_id 사용 + + start_time_measure = datetime.now() + o = OpenOil(loglevel=20) + + date_obj = _parse_datetime(start_time_str) or datetime.now() + date_utc = kst_to_utc(date_obj) + + wind_nc_path, ocean_nc_path, _, _ = check_nc_files_for_date(date_utc) + if not wind_nc_path: + raise FileNotFoundError("바람 NC 파일을 찾을 수 없습니다.") + if not ocean_nc_path: + raise FileNotFoundError("해양 NC 파일을 찾을 수 없습니다.") + + logger.info(f"[job:{job_id}] wind_nc_path: {wind_nc_path}") + logger.info(f"[job:{job_id}] ocean_nc_path: {ocean_nc_path}") + + reader_wind = reader_netCDF_CF_generic.Reader( + wind_nc_path, + standard_name_mapping={'x_wind': 'x_wind', 'y_wind': 'y_wind'} + ) + reader_ocean = reader_netCDF_CF_generic.Reader(ocean_nc_path) + if 'temperature' in reader_ocean.Dataset.variables: + temp = reader_ocean.Dataset['temperature'] + temp_values = temp.values + mask = temp_values > SIM.TEMPERATURE_THRESHOLD + temp_values[mask] = np.nan + reader_ocean.Dataset['temperature'].values = temp_values + + o.add_reader([reader_ocean, reader_wind]) + + o.set_config('processes:evaporation', True) + o.set_config('processes:emulsification', True) + o.set_config('drift:vertical_mixing', True) + o.set_config('vertical_mixing:timestep', SIM.VERTICAL_MIXING_TIMESTEP) + o.set_config('seed:m3_per_hour', mat_vol) + + if spill_time == 0 or spill_time is None: + o.seed_elements(lon=lon, lat=lat, number=100, + time=date_utc, z=0, oil_type=mat_ty) + else: + release_duration = timedelta(hours=spill_time) + end_t = date_utc + release_duration + o.seed_elements(lon=lon, lat=lat, number=100, + time=[date_utc, end_t], z=0, oil_type=mat_ty) + + ncfile = f"{STORAGE.RESULT}/{name}.nc" + try: + o.run(duration=timedelta(hours=run_time), time_step=900, time_step_output=3600, outfile=ncfile) + except Exception as e: + logger.error(f"[job:{job_id}] 시뮬레이션 실행 오류: {e}") + raise + + json_data = extract_json(ncfile, wind_nc_path, ocean_nc_path, name, lon, lat) + if not json_data: + raise ValueError("시뮬레이션 결과 변환 실패") + + elapsed = (datetime.now() - start_time_measure).total_seconds() + logger.info(f"[job:{job_id}] 완료: {int(elapsed//60)}m {int(elapsed%60)}s") + return json_data + +if __name__ == "__main__": + import uvicorn + + # 서버 설정 (호스트와 포트는 필요에 따라 수정하세요) + # log_level="info"를 통해 FastAPI와 uvicorn의 로그를 확인할 수 있습니다. + uvicorn.run( + "api:app", + host="0.0.0.0", + port=5003, + reload=True # 코드 변경 시 자동으로 서버를 재시작하는 모드 (개발용) + ) \ No newline at end of file diff --git a/prediction/opendrift/calcCostlineLength.py b/prediction/opendrift/calcCostlineLength.py new file mode 100644 index 0000000..5320f04 --- /dev/null +++ b/prediction/opendrift/calcCostlineLength.py @@ -0,0 +1,252 @@ +""" +calcCostlineLength.py + +기름 유출로 오염된 해안선 길이를 계산하는 모듈 +Thread-safe하며 다른 스크립트에서 import하여 사용 가능 + +사용 예시: + from calcCostlineLength import OilSpillCoastlineAnalyzer + + analyzer = OilSpillCoastlineAnalyzer("coastline.shp") + length, info = analyzer.calculate_polluted_length(particles) +""" + +import geopandas as gpd +import numpy as np +from scipy.spatial import cKDTree +from typing import List, Dict, Tuple, Optional +from concurrent.futures import ThreadPoolExecutor, as_completed +import os + +from logger import get_logger +from utils import haversine_distance + +logger = get_logger("calcCostlineLength") + + +class OilSpillCoastlineAnalyzer: + """ + 기름 유출로 오염된 해안선 길이를 계산하는 클래스 (Thread-Safe) + + Attributes: + coastline_gdf: 해안선 GeoDataFrame + buffer_distance: 입자 매칭 버퍼 거리 (도 단위) + coastline_points: 해안선 점들의 NumPy 배열 + kdtree: 공간 검색을 위한 KD-Tree + segment_info: 세그먼트 정보 튜플 + segment_lengths: 세그먼트 길이 배열 (미터) + """ + + def __init__(self, coastline_shp_path: str, buffer_distance: float = 0.001, + simplify_tolerance: float = 0.001, + bbox: Optional[Tuple[float, float, float, float]] = None, + center_point: Optional[Tuple[float, float]] = None, + radius: Optional[float] = None): + + if not os.path.exists(coastline_shp_path): + raise FileNotFoundError(f"Coastline file not found: {coastline_shp_path}") + + self.coastline_gdf = gpd.read_file(coastline_shp_path) + + if self.coastline_gdf.crs and self.coastline_gdf.crs != 'EPSG:4326': + self.coastline_gdf = self.coastline_gdf.to_crs('EPSG:4326') + logger.info(f"Original coastline features: {len(self.coastline_gdf):,}") + + if bbox is not None: + self._filter_by_bbox(bbox) + elif center_point is not None and radius is not None: + self._filter_by_center(center_point, radius) + + self.buffer_distance = buffer_distance + self.simplify_tolerance = simplify_tolerance + self._build_spatial_index() + + def _filter_by_bbox(self, bbox: Tuple[float, float, float, float]): + """경계 상자로 해안선 필터링""" + minx, miny, maxx, maxy = bbox + + bounds = self.coastline_gdf.bounds + mask = ( + (bounds['minx'] <= maxx) & (bounds['maxx'] >= minx) & + (bounds['miny'] <= maxy) & (bounds['maxy'] >= miny) + ) + + self.coastline_gdf = self.coastline_gdf[mask].copy() + logger.info(f"Filtered features: {len(self.coastline_gdf):,} " + f"({len(self.coastline_gdf) / len(mask) * 100:.1f}% retained)") + + def _filter_by_center(self, center_point: Tuple[float, float], radius: float): + """중심점과 반경으로 해안선 필터링""" + lon, lat = center_point + bbox = (lon - radius, lat - radius, lon + radius, lat + radius) + self._filter_by_bbox(bbox) + + def _build_spatial_index(self): + """해안선의 공간 인덱스 구축 (KD-Tree 사용)""" + + if len(self.coastline_gdf) == 0: + logger.warning("No coastline after filtering!") + self.coastline_points = np.array([]).reshape(0, 2) + self.segment_info = tuple() + self.segment_lengths = {} + self.kdtree = None + return + + coastline_points = [] + segment_info = [] + segment_lengths = {} + + if self.simplify_tolerance > 0: + self.coastline_gdf.geometry = self.coastline_gdf.geometry.simplify( + self.simplify_tolerance, preserve_topology=False + ) + + for idx, geom in enumerate(self.coastline_gdf.geometry): + if geom.is_empty: + continue + + if geom.geom_type == 'LineString': + coords = np.array(geom.coords) + for i in range(len(coords) - 1): + p1 = coords[i] + p2 = coords[i + 1] + + seg_key = (idx, i) + + if seg_key not in segment_lengths: + length_m = haversine_distance(p1[0], p1[1], p2[0], p2[1], return_km=False) + segment_lengths[seg_key] = length_m + + coastline_points.append(p1) + segment_info.append(seg_key) + + coastline_points.append(p2) + segment_info.append(seg_key) + + elif geom.geom_type == 'MultiLineString': + for line_idx, line in enumerate(geom.geoms): + if line.is_empty: + continue + coords = np.array(line.coords) + for i in range(len(coords) - 1): + p1 = coords[i] + p2 = coords[i + 1] + + seg_key = (idx, i, line_idx) + + if seg_key not in segment_lengths: + length_m = haversine_distance(p1[0], p1[1], p2[0], p2[1], return_km=False) + segment_lengths[seg_key] = length_m + + coastline_points.append(p1) + segment_info.append(seg_key) + + coastline_points.append(p2) + segment_info.append(seg_key) + + self.coastline_points = np.array(coastline_points) + self.segment_info = tuple(segment_info) + self.segment_lengths = segment_lengths + self.kdtree = cKDTree(self.coastline_points) + + def calculate_polluted_length(self, particles: List[Dict]) -> Tuple[float, Dict]: + """ + 오염된 해안선 길이 계산 (완전 Thread-Safe) + + Args: + particles: 입자 정보 리스트 + 각 입자는 {"lon": float, "lat": float, "stranded": int} 형태 + + Returns: + tuple: (오염된 해안선 총 길이(m), 상세 정보 dict) + """ + stranded_particles = [p for p in particles if p.get('stranded', 0) == 1] + + if not stranded_particles: + return 0.0, { + "polluted_segments": 0, + "total_particles": len(particles), + "stranded_particles": 0, + "affected_particles_in_buffer": 0 + } + + if self.kdtree is None or len(self.coastline_points) == 0: + return 0.0, { + "polluted_segments": 0, + "total_particles": len(particles), + "stranded_particles": len(stranded_particles), + "affected_particles_in_buffer": 0 + } + + particle_coords = np.array([[p['lon'], p['lat']] for p in stranded_particles]) + + distances, indices = self.kdtree.query(particle_coords, k=1) + + valid_mask = distances < self.buffer_distance + valid_indices = indices[valid_mask] + + if len(valid_indices) == 0: + return 0.0, { + "polluted_segments": 0, + "total_particles": len(particles), + "stranded_particles": len(stranded_particles), + "affected_particles_in_buffer": 0 + } + + polluted_segments = set() + for idx in valid_indices: + seg_info = self.segment_info[idx] + polluted_segments.add(seg_info) + + total_length = sum(self.segment_lengths[seg] for seg in polluted_segments) + + detail_info = { + "polluted_segments": len(polluted_segments), + "total_particles": len(particles), + "stranded_particles": len(stranded_particles), + "affected_particles_in_buffer": int(valid_mask.sum()) + } + + return total_length, detail_info + + def calculate_polluted_length_batch(self, + particle_batches: List[List[Dict]], + max_workers: Optional[int] = None) -> List[Tuple[float, Dict]]: + """여러 입자 배치를 병렬로 처리""" + results = [] + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = {executor.submit(self.calculate_polluted_length, batch): i + for i, batch in enumerate(particle_batches)} + + for future in as_completed(futures): + results.append(future.result()) + + return results + + def get_info(self) -> Dict: + """분석기의 정보 반환""" + return { + "buffer_distance": self.buffer_distance, + "total_coastline_segments": len(set(self.segment_info)), + "total_coastline_points": len(self.coastline_points), + "coastline_features": len(self.coastline_gdf) + } + + +def create_analyzer(coastline_shp_path: str, + buffer_distance: float = 0.001, + simplify_tolerance: float = 0.001, + bbox: Optional[Tuple[float, float, float, float]] = None, + center_point: Optional[Tuple[float, float]] = None, + radius: Optional[float] = None) -> OilSpillCoastlineAnalyzer: + """분석기 인스턴스 생성 (편의 함수)""" + return OilSpillCoastlineAnalyzer(coastline_shp_path, buffer_distance, + simplify_tolerance, bbox, center_point, radius) + + +def calculate_single(coastline_shp_path: str, + particles: List[Dict], + buffer_distance: float = 0.001) -> Tuple[float, Dict]: + """한 번만 계산하는 경우 사용하는 편의 함수""" + analyzer = OilSpillCoastlineAnalyzer(coastline_shp_path, buffer_distance) + return analyzer.calculate_polluted_length(particles) diff --git a/prediction/opendrift/coastline/TN_SHORLINE.cpg b/prediction/opendrift/coastline/TN_SHORLINE.cpg new file mode 100644 index 0000000..12575e0 --- /dev/null +++ b/prediction/opendrift/coastline/TN_SHORLINE.cpg @@ -0,0 +1 @@ +CP949 \ No newline at end of file diff --git a/prediction/opendrift/coastline/TN_SHORLINE.dbf b/prediction/opendrift/coastline/TN_SHORLINE.dbf new file mode 100644 index 0000000..0d048f2 Binary files /dev/null and b/prediction/opendrift/coastline/TN_SHORLINE.dbf differ diff --git a/prediction/opendrift/coastline/TN_SHORLINE.prj b/prediction/opendrift/coastline/TN_SHORLINE.prj new file mode 100644 index 0000000..5844995 --- /dev/null +++ b/prediction/opendrift/coastline/TN_SHORLINE.prj @@ -0,0 +1 @@ +PROJCS["Korea_2000_Korea_Unified_Coordinate_System",GEOGCS["GCS_Korea_2000",DATUM["D_Korea_2000",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["False_Easting",1000000.0],PARAMETER["False_Northing",2000000.0],PARAMETER["Central_Meridian",127.5],PARAMETER["Scale_Factor",0.9996],PARAMETER["Latitude_Of_Origin",38.0],UNIT["Meter",1.0]] \ No newline at end of file diff --git a/prediction/opendrift/coastline/TN_SHORLINE.shp b/prediction/opendrift/coastline/TN_SHORLINE.shp new file mode 100644 index 0000000..71b7dff Binary files /dev/null and b/prediction/opendrift/coastline/TN_SHORLINE.shp differ diff --git a/prediction/opendrift/coastline/TN_SHORLINE.shx b/prediction/opendrift/coastline/TN_SHORLINE.shx new file mode 100644 index 0000000..220d61d Binary files /dev/null and b/prediction/opendrift/coastline/TN_SHORLINE.shx differ diff --git a/prediction/opendrift/config.py b/prediction/opendrift/config.py new file mode 100644 index 0000000..1cd9ec2 --- /dev/null +++ b/prediction/opendrift/config.py @@ -0,0 +1,101 @@ +""" +config.py + +중앙집중화된 설정 모듈 +모든 경로, 좌표 범위, 시뮬레이션 상수를 한 곳에서 관리합니다. +""" + +import os +from pathlib import Path +from dataclasses import dataclass +from typing import Tuple + +_BASE_STR = os.getenv("STORAGE_BASE", "C:/upload") + +@dataclass(frozen=True) +class StoragePaths: + """파일 저장 경로 설정""" + BASE: Path = Path(_BASE_STR) + WIND: Path = Path(_BASE_STR) / "wind" + POS_WIND: Path = Path(_BASE_STR) / "pos_wind" + HYDR: Path = Path(_BASE_STR) / "hydr" + POS_HYDR: Path = Path(_BASE_STR) / "pos_hydr" + RESULT: Path = Path("result") + COASTLINE: Path = Path("coastline/TN_SHORLINE.shp") + PARCELS_PATH: str = "/home/gcrnd/apps/parcels" + + +@dataclass(frozen=True) +class CoordinateBounds: + """한국 해역 좌표 범위""" + LON_MIN: float = 124.2083983267507250 + LON_MAX: float = 129.9583954964914767 + LAT_MIN: float = 32.7916655404227129 + LAT_MAX: float = 38.9583268301827559 + + @property + def lon_range(self) -> Tuple[float, float]: + return (self.LON_MIN, self.LON_MAX) + + @property + def lat_range(self) -> Tuple[float, float]: + return (self.LAT_MIN, self.LAT_MAX) + + +@dataclass(frozen=True) +class SimulationConstants: + """시뮬레이션 관련 상수""" + TEMPERATURE_THRESHOLD: float = 100.0 # 온도 이상값 필터링 임계값 + POLLUTION_GRID_BINS: int = 200 # 오염 면적 계산용 격자 해상도 + KM_PER_DEG_LAT: float = 111.0 # 위도 1도당 km + KM_PER_DEG_LON: float = 91.0 # 경도 1도당 km (위도 35도 기준) + EARTH_RADIUS_M: float = 6371000.0 # 지구 반경 (미터) + EARTH_RADIUS_KM: float = 6371.0 # 지구 반경 (km) + VERTICAL_MIXING_TIMESTEP: int = 5 # 수직 혼합 타임스텝 + FILE_FALLBACK_DAYS: int = 3 # 파일 폴백 시도 일수 + TIMEZONE_OFFSET_HOURS: int = 9 # KST-UTC 시간차 + + +@dataclass(frozen=True) +class WindJsonConfig: + """바람 데이터 추출 설정""" + RANGE_KM: float = 24.0 # 중심점으로부터의 추출 범위 (km) + GRID_SPACING_KM: float = 4.0 # 격자 간격 (km) + + +@dataclass(frozen=True) +class CoastlineAnalyzerConfig: + """해안선 분석 설정""" + BUFFER_DISTANCE: float = 0.001 # 입자 매칭 버퍼 거리 (도) + SIMPLIFY_TOLERANCE: float = 0.0 # 해안선 단순화 허용오차 + DEFAULT_RADIUS: float = 0.2 # 기본 검색 반경 (도, ~50km) + + +@dataclass(frozen=True) +class ThreadPoolConfig: + """스레드 풀 설정""" + MAX_WORKERS: int = 16 # 최대 워커 수 + + +# 싱글톤 인스턴스 +STORAGE = StoragePaths() +COORDS = CoordinateBounds() +SIM = SimulationConstants() +WIND_JSON = WindJsonConfig() +COASTLINE = CoastlineAnalyzerConfig() +THREAD_POOL = ThreadPoolConfig() + + +# 파일 패턴 템플릿 +class FilePatterns: + """NC 파일명 패턴""" + WIND_FILE = "KO108_GDAPS_ATMO_SURF_{date}00.nc" + HYDR_FILE = "KO108_MOHID_HYDR_SURF_{date}00.nc" + + @staticmethod + def get_wind_filename(date_str: str) -> str: + return FilePatterns.WIND_FILE.format(date=date_str) + + @staticmethod + def get_hydr_filename(date_str: str) -> str: + return FilePatterns.HYDR_FILE.format(date=date_str) diff --git a/prediction/opendrift/convex_hull.py b/prediction/opendrift/convex_hull.py new file mode 100644 index 0000000..c327db3 --- /dev/null +++ b/prediction/opendrift/convex_hull.py @@ -0,0 +1,34 @@ +import json +from shapely.geometry import MultiPoint, Point + +def get_convex_hull_from_json(json_data): + """ + JSON 형식의 위경도 데이터로 Convex Hull을 계산합니다. + + :param json_data: 위경도 데이터가 포함된 JSON 리스트 + :return: Convex Hull의 좌표 리스트 (폴리곤 또는 포인트) + """ + # JSON 데이터를 파싱하여 [longitude, latitude] 형태로 변환 + points = [(point["lon"], point["lat"]) for point in json_data] + + # 중복 제거 + unique_points = list(set(points)) + + if len(unique_points) < 3: + if len(unique_points) == 1: + return unique_points # 단일 포인트 반환 + elif len(unique_points) == 2: + return unique_points + [unique_points[0]] # 두 점은 선분으로 처리 + else: + raise ValueError("Convex Hull을 계산하려면 최소 3개의 고유한 포인트가 필요합니다.") + + # MultiPoint로 Convex Hull 계산 + multi_point = MultiPoint(unique_points) + hull = multi_point.convex_hull + + # 결과가 폴리곤일 경우 좌표 리스트 반환, 단일 포인트일 경우 포인트 반환 + if isinstance(hull, Point): + return [list(hull.coords)[0]] + else: + # 폴리곤의 외곽 좌표 (폐쇄된 형태로 첫 포인트 반복) + return list(hull.exterior.coords) \ No newline at end of file diff --git a/prediction/opendrift/createImage.py b/prediction/opendrift/createImage.py new file mode 100644 index 0000000..cf02f69 --- /dev/null +++ b/prediction/opendrift/createImage.py @@ -0,0 +1,103 @@ +from PIL import Image +import base64 +from io import BytesIO + +def crop_and_encode_geographic_image( + image_path: str, + center_point: tuple[float, float] # (center_lon, center_lat) +) -> str: + """ + 지정된 PNG 이미지에서 특정 위경도 중심을 기준으로 + 주변 crop_radius_km 영역을 잘라내고 Base64 문자열로 인코딩합니다. + + :param image_path: 입력 PNG 파일 경로. + :param image_bounds: 이미지 전체가 나타내는 영역의 (min_lon, min_lat, max_lon, max_lat) + 좌표. (경도 최소, 위도 최소, 경도 최대, 위도 최대) + :param center_point: 자르기 영역의 중심이 될 (lon, lat) 좌표. + :param crop_radius_km: 중심에서 상하좌우로 자를 거리 (km). + :return: 잘린 이미지의 PNG Base64 문자열. + """ + image_bounds = (124.2083983267507250, 32.7916655404227129, 129.9583954964914767, 38.9583268301827559) + crop_radius_km = 25.0 + + # 1. 이미지 로드 + try: + img = Image.open(image_path) + except FileNotFoundError: + return f"Error: File not found at {image_path}" + except Exception as e: + return f"Error opening image: {e}" + + width, height = img.size + min_lon, min_lat, max_lon, max_lat = image_bounds + center_lon, center_lat = center_point + + # 2. 위경도 경계 계산 (25km 반경) + + # 1도당 근사적인 거리 (대한민국 지역 기준) + # 위도 1도: 약 111 km (거의 일정) + # 경도 1도: 위도에 따라 달라지지만, 한국의 위도(약 33~38도)에서 약 88~93 km 정도. + # 안전을 위해 WGS84 타원체 기준 위도 35도에서 경도 1도당 약 91.2km 가정 + # 더 정확한 계산을 위해선 `pyproj` 등의 라이브러리 사용이 권장되나, 여기선 근사치 사용 + + KM_PER_DEG_LAT = 111.0 # 위도 1도당 km (근사치) + KM_PER_DEG_LON_AT_35 = 91.2 # 위도 35도에서 경도 1도당 km (근사치) + + # 위도/경도 1도에 해당하는 픽셀 수 계산 + deg_lat_span = max_lat - min_lat + deg_lon_span = max_lon - min_lon + + # KM_PER_DEG_LON을 중심 위도에 맞게 조정 (단순화를 위해 상수 사용을 유지) + + # 25km에 해당하는 위도/경도 변화량 계산 + delta_lat = crop_radius_km / KM_PER_DEG_LAT + delta_lon = crop_radius_km / KM_PER_DEG_LON_AT_35 # 근사치 사용 + + # 자를 영역의 위경도 바운딩 박스 (Bounding Box) + crop_min_lon = center_lon - delta_lon + crop_max_lon = center_lon + delta_lon + crop_min_lat = center_lat - delta_lat + crop_max_lat = center_lat + delta_lat + bounds = { + "min_lon": float(crop_min_lon), + "max_lon": float(crop_max_lon), + "min_lat": float(crop_min_lat), + "max_lat": float(crop_max_lat) + } + + # 3. 위경도 좌표를 픽셀 좌표로 변환 (선형 매핑 가정) + + # 픽셀 좌표 x: min_lon -> 0, max_lon -> width + # 픽셀 좌표 y: max_lat -> 0, min_lat -> height (GIS 이미지는 보통 북쪽(위도 최대)이 0에 해당) + + def lon_to_pixel_x(lon): + return int(width * (lon - min_lon) / deg_lon_span) + + def lat_to_pixel_y(lat): + # Y축은 위도에 반비례 (큰 위도가 작은 Y 픽셀) + return int(height * (max_lat - lat) / deg_lat_span) + + # 자를 영역의 픽셀 좌표 계산 + pixel_x_min = max(0, lon_to_pixel_x(crop_min_lon)) + pixel_y_min = max(0, lat_to_pixel_y(crop_max_lat)) # 위도 최대가 y_min (상단) + pixel_x_max = min(width, lon_to_pixel_x(crop_max_lon)) + pixel_y_max = min(height, lat_to_pixel_y(crop_min_lat)) # 위도 최소가 y_max (하단) + + # PIL의 crop 함수는 (left, top, right, bottom) 순서의 픽셀 좌표를 사용 + crop_box = (pixel_x_min, pixel_y_min, pixel_x_max, pixel_y_max) + + # 4. 이미지 자르기 + if pixel_x_min >= pixel_x_max or pixel_y_min >= pixel_y_max: + return "Error: Crop area is outside the image bounds or zero size." + + cropped_img = img.crop(crop_box) + + # 5. Base64 문자열로 인코딩 + buffer = BytesIO() + cropped_img.save(buffer, format="PNG") + base64_encoded_data = base64.b64encode(buffer.getvalue()).decode("utf-8") + + # Base64 문자열 앞에 MIME 타입 정보 추가 + # base64_string = f"data:image/png;base64,{base64_encoded_data}" + + return base64_encoded_data, bounds diff --git a/prediction/opendrift/createJsonResult.py b/prediction/opendrift/createJsonResult.py new file mode 100644 index 0000000..c7c8275 --- /dev/null +++ b/prediction/opendrift/createJsonResult.py @@ -0,0 +1,325 @@ +import xarray as xr +import numpy as np +import pandas as pd +import os +from datetime import datetime, timedelta +from shapely.geometry import Polygon, Point +from convex_hull import get_convex_hull_from_json as get_convex_hull +from createWindJson import extract_wind_data_json as create_wind_json +from calcCostlineLength import OilSpillCoastlineAnalyzer +from extractUvWithinBox import _compute_hydr_region, _extract_uv_at_time +import concurrent.futures + +from config import STORAGE, SIM, COASTLINE, THREAD_POOL +from logger import get_logger +from utils import check_img_file_by_date, find_time_index + +logger = get_logger("createJsonResult") + + +def extract_and_save_data_to_json(ncfile_path, wind_ncfile_path, ocean_ncfile_path, name, ac_lon, ac_lat): + """ + NetCDF 파일에서 시간별 위치, 잔존량, 풍화량, 오염 면적을 추출하여 JSON 파일로 저장합니다. + """ + logger.info(f"Processing: {ncfile_path}, {wind_ncfile_path}, {ocean_ncfile_path}") + try: + if not os.path.exists(ncfile_path): + raise FileNotFoundError(f"Simulation result file not found: {ncfile_path}") + if not os.path.exists(wind_ncfile_path): + raise FileNotFoundError(f"Wind data file not found: {wind_ncfile_path}") + if not os.path.exists(ocean_ncfile_path): + raise FileNotFoundError(f"Ocean data file not found: {ocean_ncfile_path}") + + analyzer = OilSpillCoastlineAnalyzer( + str(STORAGE.COASTLINE), + buffer_distance=COASTLINE.BUFFER_DISTANCE, + simplify_tolerance=COASTLINE.SIMPLIFY_TOLERANCE, + center_point=(ac_lon, ac_lat), + radius=COASTLINE.DEFAULT_RADIUS + ) + + with xr.open_dataset(ncfile_path) as ds, \ + xr.open_dataset(wind_ncfile_path) as wind_ds, \ + xr.open_dataset(ocean_ncfile_path) as ocean_ds: + + # ------------------------------------------------------------------ + # ① 시뮬레이션 변수 일괄 로드 — xarray → NumPy 1회 변환 + # ------------------------------------------------------------------ + total_steps = len(ds.time) + + # OpenDrift NetCDF 차원 순서: (trajectory, time) = (N, T) + # .T 전치로 (T, N)으로 통일하여 이후 모든 연산이 axis=0=시간, axis=1=입자 기준이 되도록 함 + status_all = ds.status.values.T if 'status' in ds else None # (T, N) + lon_all = ds.lon.values.T if 'lon' in ds else None + lat_all = ds.lat.values.T if 'lat' in ds else None + mass_all = ds.mass_oil.values.T if 'mass_oil' in ds else None + density_all = ds.density.values.T if 'density' in ds else None + evap_all = ds.mass_evaporated.values.T if 'mass_evaporated' in ds else None + moving_all = ds.moving.values.T if 'moving' in ds else None + viscosity_all = ds.viscosity.values.T if 'viscosity' in ds else None + watertemp_all = ds.sea_water_temperature.values.T if 'sea_water_temperature' in ds else None + oil_film_thick = (float(ds.oil_film_thickness.isel(time=0).values[0]) + if 'oil_film_thickness' in ds else None) + + initial_mass_total = None + if mass_all is not None: + try: + initial_mass_total = float(mass_all[0].sum()) + logger.info(f"Initial oil mass: {initial_mass_total:.2f} kg") + except Exception as e: + logger.warning(f"Error processing mass_oil: {e}") + + # ------------------------------------------------------------------ + # ② 타임스탬프 사전 계산 + # ------------------------------------------------------------------ + time_values_pd = pd.to_datetime(ds.time.values) + formatted_times = [t.strftime('%Y-%m-%d %H:%M:%S') for t in time_values_pd] + + # ------------------------------------------------------------------ + # ③ 전처리 루프 벡터화 (기존 lines 77–175 대체) + # ------------------------------------------------------------------ + cumulative_evaporated_arr = np.zeros(total_steps) + cumulative_beached_arr = np.zeros(total_steps) + cumulative_pollution_area = {} + + first_strand_step = None + last_valid_lon = None + last_valid_lat = None + + if status_all is not None and lon_all is not None: + N = status_all.shape[1] + + # 입자별 최초 stranded 스텝 + stranded_mask = (status_all == 1) # (T, N) + has_stranded = stranded_mask.any(axis=0) # (N,) + first_strand_step = np.where(has_stranded, + stranded_mask.argmax(axis=0), + -1).astype(np.int32) # (N,) + + # 입자별 마지막 유효 위치 + valid_lon_mask = ~np.isnan(lon_all) # (T, N) + has_any_valid = valid_lon_mask.any(axis=0) # (N,) + last_valid_t = (total_steps - 1 + - np.flip(valid_lon_mask, axis=0).argmax(axis=0)) # (N,) + pix = np.arange(N) + last_valid_lon = np.where(has_any_valid, lon_all[last_valid_t, pix], np.nan) + last_valid_lat = np.where(has_any_valid, lat_all[last_valid_t, pix], np.nan) + + # 누적 증발량 — expanding max per particle + if evap_all is not None: + evap_clean = np.where(np.isnan(evap_all), -np.inf, evap_all) + run_evap = np.maximum.accumulate(evap_clean, axis=0) # (T, N) + run_evap = np.clip(run_evap, 0, None) + cumulative_evaporated_arr = run_evap.sum(axis=1) # (T,) + + # 누적 부착량 — expanding max per stranded particle + if mass_all is not None and density_all is not None: + safe_den = np.where((density_all > 0) & ~np.isnan(density_all), + density_all, np.inf) + beach_vol = np.where( + stranded_mask & ~np.isnan(mass_all) & (mass_all > 0), + mass_all / safe_den, 0.0) # (T, N) + run_beach = np.maximum.accumulate(beach_vol, axis=0) # (T, N) + + strand_threshold = np.where(first_strand_step >= 0, + first_strand_step, + total_steps) + active_matrix = (np.arange(total_steps)[:, None] + >= strand_threshold[None, :]) # (T, N) + cumulative_beached_arr = (run_beach * active_matrix).sum(axis=1) # (T,) + + # 누적 오염 면적 — 순차 루프 유지 (set union 의존), 내부 연산은 vectorized + all_polluted_cells = set() + grid_config = None + + for i in range(total_steps): + if mass_all is not None: + lon_i = lon_all[i] + lat_i = lat_all[i] + mass_i = mass_all[i] + valid_mask = (~np.isnan(lon_i)) & (~np.isnan(lat_i)) & (mass_i > 0) + + if np.any(valid_mask): + lon_active = lon_i[valid_mask] + lat_active = lat_i[valid_mask] + + if grid_config is None: + grid_config = { + 'min_lon': lon_active.min() - 0.01, + 'max_lon': lon_active.max() + 0.01, + 'min_lat': lat_active.min() - 0.01, + 'max_lat': lat_active.max() + 0.01, + 'num_lon_bins': SIM.POLLUTION_GRID_BINS, + 'num_lat_bins': SIM.POLLUTION_GRID_BINS, + } + else: + grid_config['min_lon'] = min(grid_config['min_lon'], lon_active.min() - 0.01) + grid_config['max_lon'] = max(grid_config['max_lon'], lon_active.max() + 0.01) + grid_config['min_lat'] = min(grid_config['min_lat'], lat_active.min() - 0.01) + grid_config['max_lat'] = max(grid_config['max_lat'], lat_active.max() + 0.01) + + lon_bins = np.linspace(grid_config['min_lon'], grid_config['max_lon'], + grid_config['num_lon_bins'] + 1) + lat_bins = np.linspace(grid_config['min_lat'], grid_config['max_lat'], + grid_config['num_lat_bins'] + 1) + + lon_indices = np.digitize(lon_active, lon_bins) - 1 + lat_indices = np.digitize(lat_active, lat_bins) - 1 + + for lon_idx, lat_idx in zip(lon_indices, lat_indices): + if (0 <= lon_idx < grid_config['num_lon_bins'] + and 0 <= lat_idx < grid_config['num_lat_bins']): + all_polluted_cells.add((lon_idx, lat_idx)) + + delta_lon_km = ((grid_config['max_lon'] - grid_config['min_lon']) + * SIM.KM_PER_DEG_LON) + delta_lat_km = ((grid_config['max_lat'] - grid_config['min_lat']) + * SIM.KM_PER_DEG_LAT) + area_of_cell_km2 = ((delta_lon_km / grid_config['num_lon_bins']) + * (delta_lat_km / grid_config['num_lat_bins'])) + + cumulative_pollution_area[i] = len(all_polluted_cells) * area_of_cell_km2 + else: + cumulative_pollution_area[i] = cumulative_pollution_area.get(i - 1, 0.0) if i > 0 else 0.0 + else: + cumulative_pollution_area[i] = cumulative_pollution_area.get(i - 1, 0.0) if i > 0 else 0.0 + + # ------------------------------------------------------------------ + # ④ wind/hydr 전체 타임스텝 사전 추출 (파일 재오픈 없음) + # ------------------------------------------------------------------ + logger.info("Pre-extracting hydr data for all timesteps...") + hydr_region = _compute_hydr_region(ocean_ds, ac_lon, ac_lat) + hydr_time_indices = [find_time_index(ocean_ds, t)[0] for t in formatted_times] + hydr_cache = [_extract_uv_at_time(ocean_ds, idx, hydr_region) + for idx in hydr_time_indices] + + logger.info("Pre-extracting wind data for all timesteps...") + wind_cache = [create_wind_json(wind_ds, time_values_pd[i], ac_lon, ac_lat) + for i in range(total_steps)] + + # ------------------------------------------------------------------ + # ⑤ process_time_step — pre-loaded 배열 사용, 중복 xarray 호출 없음 + # ------------------------------------------------------------------ + def process_time_step(i): + formatted_time = formatted_times[i] + logger.debug(f"Processing time step: {formatted_time}") + + lon_t = lon_all[i] if lon_all is not None else None + lat_t = lat_all[i] if lat_all is not None else None + mass_t = mass_all[i] if mass_all is not None else None + density_t = density_all[i] if density_all is not None else None + status_t = status_all[i] if status_all is not None else None + moving_t = moving_all[i] if moving_all is not None else None + viscosity_t = viscosity_all[i] if viscosity_all is not None else None + watertemp_t = watertemp_all[i] if watertemp_all is not None else None + + # 활성 입자 처리 + active_mask = moving_t > 0 if moving_t is not None else None + active_lon = lon_t[active_mask] if (active_mask is not None and lon_t is not None) else [] + active_lat = lat_t[active_mask] if (active_mask is not None and lat_t is not None) else [] + + center_lon, center_lat = None, None + if len(active_lon) > 0 and len(active_lat) > 0: + center_lon = float(np.mean(active_lon)) + center_lat = float(np.mean(active_lat)) + + # 입자 목록 구성 (stranded 처리 + last valid position 사용) + particles = [] + if lon_t is not None and lat_t is not None and first_strand_step is not None: + stranded_flags = (first_strand_step >= 0) & (i >= first_strand_step) # (N,) + + for idx in range(len(lon_t)): + lon_val = lon_t[idx] + lat_val = lat_t[idx] + stranded_value = int(stranded_flags[idx]) + + if stranded_value == 1 and (np.isnan(lon_val) or np.isnan(lat_val)): + if last_valid_lon is not None and not np.isnan(last_valid_lon[idx]): + lon_val = last_valid_lon[idx] + lat_val = last_valid_lat[idx] + + if np.isnan(lon_val) or np.isnan(lat_val): + continue + + particles.append({ + "lon": float(lon_val), + "lat": float(lat_val), + "stranded": stranded_value, + }) + + # 오염 해안 길이 + length, info = analyzer.calculate_polluted_length(particles) + + # Convex hull + try: + hull_coords = get_convex_hull(particles) + wkt = Point(hull_coords[0]).wkt if len(hull_coords) == 1 else Polygon(hull_coords).wkt + except ValueError as e: + logger.warning(f"Convex hull error: {e}") + wkt = "" + + # 잔존량 (status == 0 해상 입자) + remaining_volume_m3 = 0.0 + if mass_t is not None and density_t is not None and status_t is not None: + sea_mask = (status_t == 0) + sea_masses = mass_t[sea_mask] + sea_densities = density_t[sea_mask] + valid_sea = ~np.isnan(sea_masses) & (sea_masses > 0) & ~np.isnan(sea_densities) & (sea_densities > 0) + if np.any(valid_sea): + avg_density = np.mean(sea_densities[valid_sea]) + if avg_density > 0: + remaining_volume_m3 = float(np.sum(sea_masses[valid_sea]) / avg_density) + + # 누적 증발량 → 부피 + evaporated_mass = float(cumulative_evaporated_arr[i]) + weathered_volume_m3 = 0.0 + if evaporated_mass > 0 and density_t is not None: + valid_den = ~np.isnan(density_t) & (density_t > 0) + if np.any(valid_den): + avg_density = float(np.mean(density_t[valid_den])) + if avg_density > 0: + weathered_volume_m3 = evaporated_mass / avg_density + + beached_volume_m3 = float(cumulative_beached_arr[i]) + pollution_area = cumulative_pollution_area.get(i, 0.0) + + average_viscosity = float(np.nanmean(viscosity_t)) if (viscosity_t is not None and len(viscosity_t) > 0) else None + average_water_temp = float(np.nanmean(watertemp_t)) if (watertemp_t is not None and len(watertemp_t) > 0) else None + + hydr_data = hydr_cache[i] + + return { + "time": formatted_time, + "center_lon": float(center_lon) if center_lon is not None else None, + "center_lat": float(center_lat) if center_lat is not None else None, + "remaining_volume_m3": float(remaining_volume_m3), + "weathered_volume_m3": float(weathered_volume_m3), + "pollution_area_km2": float(pollution_area), + "beached_volume_m3": float(beached_volume_m3), + "particles": particles, + "wkt": wkt, + "viscosity": average_viscosity, + "thickness": oil_film_thick, + "temperature": average_water_temp, + "wind_data": wind_cache[i], + "hydr_data": hydr_data['value'], + "hydr_grid": hydr_data['grid'], + "pollution_coast_length_m": length, + } + + # ThreadPoolExecutor — wind/hydr I/O 없으므로 CPU 위주 작업만 + with concurrent.futures.ThreadPoolExecutor(max_workers=THREAD_POOL.MAX_WORKERS) as executor: + futures = {executor.submit(process_time_step, i): i for i in range(total_steps)} + results = [None] * total_steps + for future in concurrent.futures.as_completed(futures): + idx = futures[future] + results[idx] = future.result() + + return results + + except FileNotFoundError as e: + logger.error(f"File not found: {e}") + return None + except Exception as e: + logger.exception(f"An unexpected error occurred: {e}") + return None diff --git a/prediction/opendrift/createWindJson.py b/prediction/opendrift/createWindJson.py new file mode 100644 index 0000000..4c138f8 --- /dev/null +++ b/prediction/opendrift/createWindJson.py @@ -0,0 +1,95 @@ +import xarray as xr +import numpy as np +from datetime import datetime +import pandas as pd +from typing import Union + +from config import WIND_JSON, SIM +from logger import get_logger + +logger = get_logger("createWindJson") + + +def extract_wind_data_json(wind_ds_or_path: Union[str, xr.Dataset], + target_time, center_lon: float, center_lat: float) -> list: + """ + NetCDF 파일 또는 이미 열린 Dataset에서 특정 시간의 바람 데이터를 추출하고 JSON 형식으로 반환 + + Parameters + ---------- + wind_ds_or_path : str or xr.Dataset + NetCDF 파일 경로(str) 또는 이미 열린 xr.Dataset 객체. + Dataset 객체가 전달되면 내부에서 닫지 않습니다. + target_time : str or datetime or pd.Timestamp + 추출할 시간 (예: '2024-01-15 12:00:00' 또는 datetime 객체) + center_lon : float + 중심 경도 + center_lat : float + 중심 위도 + + Returns + ------- + list + 위도, 경도, 풍향, 풍속 정보가 포함된 리스트 + """ + range_km = WIND_JSON.RANGE_KM + grid_spacing_km = WIND_JSON.GRID_SPACING_KM + + own_ds = isinstance(wind_ds_or_path, str) + if own_ds: + ds = xr.open_dataset(wind_ds_or_path) + else: + ds = wind_ds_or_path + + try: + ds_time = ds.sel(time=target_time, method='nearest') + + lat_per_km = 1 / SIM.KM_PER_DEG_LAT + lon_per_km = 1 / (SIM.KM_PER_DEG_LAT * np.cos(np.radians(center_lat))) + + lat_range = range_km * lat_per_km + lon_range = range_km * lon_per_km + + lat_min = center_lat - lat_range + lat_max = center_lat + lat_range + lon_min = center_lon - lon_range + lon_max = center_lon + lon_range + + ds_subset = ds_time.sel( + lat=slice(lat_min, lat_max), + lon=slice(lon_min, lon_max) + ) + + n_points = int(2 * range_km / grid_spacing_km) + 1 + new_lats = np.linspace(lat_min, lat_max, n_points) + new_lons = np.linspace(lon_min, lon_max, n_points) + + ds_interp = ds_subset.interp( + lat=new_lats, + lon=new_lons, + method='linear' + ) + + u = ds_interp['x_wind'].values + v = ds_interp['y_wind'].values + + wind_speed = np.sqrt(u**2 + v**2) + wind_direction = (270 - np.arctan2(v, u) * 180 / np.pi) % 360 + + data = [] + for i, lat in enumerate(new_lats): + for j, lon in enumerate(new_lons): + ws = float(wind_speed[i, j]) if not np.isnan(wind_speed[i, j]) else None + wd = float(wind_direction[i, j]) if not np.isnan(wind_direction[i, j]) else None + data.append({ + "lat": round(float(lat), 6), + "lon": round(float(lon), 6), + "wind_speed": round(ws, 2) if ws is not None else None, + "wind_direction": round(wd, 1) if wd is not None else None, + }) + + return data + + finally: + if own_ds: + ds.close() diff --git a/prediction/opendrift/dockerfile b/prediction/opendrift/dockerfile new file mode 100644 index 0000000..ec4c377 --- /dev/null +++ b/prediction/opendrift/dockerfile @@ -0,0 +1,21 @@ +FROM opendrift/opendrift:latest + +WORKDIR /app + +# gunicorn 설치 추가 +RUN pip install fastapi uvicorn gunicorn +# 결과 데이터 저장 폴더 추가 +RUN mkdir -p /app/result + +COPY . . + +EXPOSE 5003 + +# gunicorn으로 실행 +CMD ["gunicorn", "api:app", "-w", "4", "-k", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:5003"] + +## 빌드 명령어 +# docker build -t opendrift-api . + +## 실행 명령어 (로컬 데이터 폴더를 컨테이너의 /storage로 마운트) +# docker run -d -p 5003:5003 -v /devdata/services/prediction/data:/storage -e STORAGE_BASE=/storage --name opendrift-api opendrift-api diff --git a/prediction/opendrift/extractUvFull.py b/prediction/opendrift/extractUvFull.py new file mode 100644 index 0000000..4a39962 --- /dev/null +++ b/prediction/opendrift/extractUvFull.py @@ -0,0 +1,120 @@ +import numpy as np +import xarray as xr +from datetime import datetime +import pandas as pd + +from logger import get_logger +from utils import find_time_index, convert_and_round + +logger = get_logger("extractUvFull") + + +def extract_uv_full(nc_file, target_time, category, skip=5, lon_range=None, lat_range=None): + """ + NetCDF 파일 전체에서 선택한 시간의 u, v 데이터 추출 (일정 간격으로 샘플링) + """ + ds = xr.open_dataset(nc_file) + + time_idx, selected_time = find_time_index(ds, target_time) + + lon = ds['lon'].values + lat = ds['lat'].values + + if lon.ndim == 1 and lat.ndim == 1: + lon_2d, lat_2d = np.meshgrid(lon, lat) + else: + lon_2d = lon + lat_2d = lat + + if category == "wind": + u_data = ds['x_wind'].values + v_data = ds['y_wind'].values + else: + u_data = ds['ssu'].values + v_data = ds['ssv'].values + + if u_data.ndim == 3: + u_full = u_data[time_idx] + v_full = v_data[time_idx] + elif u_data.ndim == 4: + u_full = u_data[time_idx, 0] + v_full = v_data[time_idx, 0] + else: + u_full = u_data + v_full = v_data + + if lon_range is not None or lat_range is not None: + mask = np.ones(lon_2d.shape, dtype=bool) + + if lon_range is not None: + min_lon, max_lon = lon_range + mask = mask & (lon_2d >= min_lon) & (lon_2d <= max_lon) + + if lat_range is not None: + min_lat, max_lat = lat_range + mask = mask & (lat_2d >= min_lat) & (lat_2d <= max_lat) + + rows, cols = np.where(mask) + if len(rows) == 0: + raise ValueError("No data within specified range") + + min_row, max_row = rows.min(), rows.max() + min_col, max_col = cols.min(), cols.max() + + u_full = u_full[min_row:max_row+1, min_col:max_col+1] + v_full = v_full[min_row:max_row+1, min_col:max_col+1] + lon_2d = lon_2d[min_row:max_row+1, min_col:max_col+1] + lat_2d = lat_2d[min_row:max_row+1, min_col:max_col+1] + + u_region = u_full[::skip, ::skip] + v_region = v_full[::skip, ::skip] + lon_region = lon_2d[::skip, ::skip] + lat_region = lat_2d[::skip, ::skip] + + logger.debug(f"Original size: {u_full.shape}") + logger.debug(f"Sampled size: {u_region.shape}") + + land_mask = (u_region == 0) & (v_region == 0) + + u_list = convert_and_round(u_region, land_mask) + v_list = convert_and_round(v_region, land_mask) + + lon_intervals = [] + for i in range(lon_region.shape[1] - 1): + interval = lon_region[0, i+1] - lon_region[0, i] + lon_intervals.append(float(interval)) + + lat_intervals = [] + for i in range(lat_region.shape[0] - 1): + interval = lat_region[i+1, 0] - lat_region[i, 0] + lat_intervals.append(float(interval)) + + bound_lon_lat = { + "top": float(lat_region.max()), + "bottom": float(lat_region.min()), + "left": float(lon_region.min()), + "right": float(lon_region.max()) + } + + model_fcst_dt = selected_time.strftime("%Y%m%d%H") + + result = { + "data": { + "modelFcstDt": model_fcst_dt, + "values": [ + u_list, + v_list + ] + }, + "grid": { + "lonInterval": lon_intervals, + "boundLonLat": bound_lon_lat, + "rows": int(u_region.shape[0]), + "cols": int(u_region.shape[1]), + "latInterval": lat_intervals + } + } + + ds.close() + + return result diff --git a/prediction/opendrift/extractUvWithinBox.py b/prediction/opendrift/extractUvWithinBox.py new file mode 100644 index 0000000..9da133b --- /dev/null +++ b/prediction/opendrift/extractUvWithinBox.py @@ -0,0 +1,164 @@ +import numpy as np +import xarray as xr +from datetime import datetime +import pandas as pd + +from logger import get_logger +from utils import haversine_distance, find_time_index, convert_and_round + +logger = get_logger("extractUvWithinBox") + + +def _compute_hydr_region(ocean_ds: xr.Dataset, center_lon: float, center_lat: float, + box_size_km: float = 25) -> dict: + """ + 해양 데이터셋에서 중심점 기준 공간 영역을 계산합니다. 시뮬레이션당 1회 호출. + + Parameters + ---------- + ocean_ds : xr.Dataset + 이미 열린 해양 NetCDF 데이터셋 + center_lon, center_lat : float + 중심점 경도, 위도 + box_size_km : float + 상하좌우 범위 (km) + + Returns + ------- + dict + min_row, max_row, min_col, max_col, lon_region, lat_region, + lon_intervals, lat_intervals, bound_lon_lat, u_ndim + """ + lon = ocean_ds['lon'].values + lat = ocean_ds['lat'].values + + if lon.ndim == 1 and lat.ndim == 1: + lon_2d, lat_2d = np.meshgrid(lon, lat) + else: + lon_2d = lon + lat_2d = lat + + # 동서/남북 거리 계산 + dx = haversine_distance(center_lon, center_lat, lon_2d, center_lat, return_km=True) + dx = dx * np.sign(lon_2d - center_lon) + dy = haversine_distance(center_lon, center_lat, center_lon, lat_2d, return_km=True) + dy = dy * np.sign(lat_2d - center_lat) + + mask = (np.abs(dx) <= box_size_km) & (np.abs(dy) <= box_size_km) + + rows, cols = np.where(mask) + if len(rows) == 0: + raise ValueError("No data within specified range") + + min_row, max_row = int(rows.min()), int(rows.max()) + min_col, max_col = int(cols.min()), int(cols.max()) + + lon_region = lon_2d[min_row:max_row + 1, min_col:max_col + 1] + lat_region = lat_2d[min_row:max_row + 1, min_col:max_col + 1] + + lon_intervals = [float(lon_region[0, i + 1] - lon_region[0, i]) + for i in range(lon_region.shape[1] - 1)] + lat_intervals = [float(lat_region[i + 1, 0] - lat_region[i, 0]) + for i in range(lat_region.shape[0] - 1)] + + bound_lon_lat = { + "top": float(lat_region.max()), + "bottom": float(lat_region.min()), + "left": float(lon_region.min()), + "right": float(lon_region.max()), + } + + # u 변수 차원 수 미리 파악 + u_data = ocean_ds.get('u', ocean_ds.get('ssu')) + u_ndim = u_data.ndim + + return { + "min_row": min_row, "max_row": max_row, + "min_col": min_col, "max_col": max_col, + "rows": int(max_row - min_row + 1), + "cols": int(max_col - min_col + 1), + "lon_intervals": lon_intervals, + "lat_intervals": lat_intervals, + "bound_lon_lat": bound_lon_lat, + "u_ndim": u_ndim, + } + + +def _extract_uv_at_time(ocean_ds: xr.Dataset, time_idx: int, region: dict) -> dict: + """ + 사전 계산된 공간 영역에서 특정 timestep의 u/v 데이터를 추출합니다. + + Parameters + ---------- + ocean_ds : xr.Dataset + 이미 열린 해양 NetCDF 데이터셋 + time_idx : int + 시간 인덱스 + region : dict + _compute_hydr_region()이 반환한 영역 정보 + + Returns + ------- + dict + {"value": [u_list, v_list], "grid": {...}} + """ + u_data = ocean_ds.get('u', ocean_ds.get('ssu')) + v_data = ocean_ds.get('v', ocean_ds.get('ssv')) + + u_ndim = region["u_ndim"] + if u_ndim == 3: + u_2d = u_data[time_idx].values + v_2d = v_data[time_idx].values + elif u_ndim == 4: + u_2d = u_data[time_idx, 0].values + v_2d = v_data[time_idx, 0].values + else: + u_2d = u_data.values + v_2d = v_data.values + + r = region + u_region = u_2d[r["min_row"]:r["max_row"] + 1, r["min_col"]:r["max_col"] + 1] + v_region = v_2d[r["min_row"]:r["max_row"] + 1, r["min_col"]:r["max_col"] + 1] + + land_mask = (u_region == 0) & (v_region == 0) + u_list = convert_and_round(u_region, land_mask) + v_list = convert_and_round(v_region, land_mask) + + return { + "value": [u_list, v_list], + "grid": { + "lonInterval": r["lon_intervals"], + "boundLonLat": r["bound_lon_lat"], + "rows": r["rows"], + "cols": r["cols"], + "latInterval": r["lat_intervals"], + }, + } + + +def extract_uv_within_box(nc_file, center_lon, center_lat, target_time, box_size_km=25): + """ + 선택한 포인트와 시간으로부터 상하좌우 정사각형 범위 내의 u, v 데이터 추출 + + Parameters + ---------- + nc_file : str + NetCDF 파일 경로 + center_lon : float + 중심점 경도 + center_lat : float + 중심점 위도 + target_time : str or datetime + 목표 시간 (예: '2024-01-15 12:00:00') + box_size_km : float + 상하좌우 범위 (km), 기본값 25km + + Returns + ------- + result : dict + 추출된 데이터를 담은 딕셔너리 + """ + with xr.open_dataset(nc_file) as ds: + region = _compute_hydr_region(ds, center_lon, center_lat, box_size_km) + time_idx, _ = find_time_index(ds, target_time) + return _extract_uv_at_time(ds, time_idx, region) diff --git a/prediction/opendrift/findFile.py b/prediction/opendrift/findFile.py new file mode 100644 index 0000000..fb535f9 --- /dev/null +++ b/prediction/opendrift/findFile.py @@ -0,0 +1,58 @@ +import os +import glob +from datetime import datetime + +def find_nearest_earlier_file(folder_path, target_time_str): + """ + 주어진 폴더에서 target_time_str (yyyymmddhhmmss)보다 빠르면서 + 가장 가까운 시간의 파일을 찾습니다. + + :param folder_path: 파일을 검색할 폴더 경로 + :param target_time_str: 기준 시간 문자열 (yyyymmddhhmmss 형식) + :return: 찾은 파일의 전체 경로 (없으면 None) + """ + # 1. 대상 시간을 datetime 객체로 변환 + try: + # 파일명 형식 (yyyy-mm-dd hh:mm:ss)과 일치하는 포맷 + target_time = datetime.strptime(target_time_str, '%Y-%m-%d %H:%M:%S') + except ValueError: + return f"오류: 입력된 기준 시간 '{target_time_str}'의 형식이 'yyyymmddhhmmss'와 일치하지 않습니다." + + # 2. 폴더 내 파일 목록 가져오기 및 시간 정보 파싱 + # '??????????????.png' 패턴으로 파일명 필터링 + file_pattern = os.path.join(folder_path, '*.png') + all_files = glob.glob(file_pattern) + + # 시간 차이와 파일 경로를 저장할 리스트 + earlier_files = [] + + for file_path in all_files: + # 파일명에서 확장자를 제외한 부분 (시간 문자열) 추출 + base_name = os.path.basename(file_path) + file_time_str = base_name.split('.')[0] + + # 파일명 길이가 yyyymmddhhmmss (14자리)인지 확인 + if len(file_time_str) == 14: + try: + # 파일 시간을 datetime 객체로 변환 + file_time = datetime.strptime(file_time_str, '%Y%m%d%H%M%S') + + # 3. 기준 시간보다 **이전**인 파일만 필터링 + if file_time <= target_time: + # 기준 시간과의 차이 (양수)를 계산하고 저장 + time_difference = target_time - file_time + earlier_files.append((time_difference, file_path)) + + except ValueError: + # 파일명이 14자리여도 datetime 변환에 실패하면 건너뜀 + continue + + # 4. 필터링된 파일 중 시간 차이가 가장 작은 (가장 가까운) 파일 찾기 + if not earlier_files: + return None # 기준 시간보다 이전인 파일이 없는 경우 + + # 시간 차이를 기준으로 오름차순 정렬 (가장 작은 차이가 첫 번째 요소) + earlier_files.sort(key=lambda x: x[0]) + + # 가장 가까운 파일의 경로 반환 + return earlier_files[0][1] diff --git a/prediction/opendrift/latestForecastDate.py b/prediction/opendrift/latestForecastDate.py new file mode 100644 index 0000000..c12a2b9 --- /dev/null +++ b/prediction/opendrift/latestForecastDate.py @@ -0,0 +1,203 @@ +import os +import xarray as xr +import pandas as pd +import numpy as np +from datetime import datetime, timedelta +from typing import Optional, List + +from config import STORAGE +from logger import get_logger + +logger = get_logger("latestForecastDate") + + +def check_file_size(file_path: str, min_size_bytes: int = 1024) -> bool: + """파일 크기 확인""" + try: + if not os.path.exists(file_path): + return False + + file_size = os.path.getsize(file_path) + if file_size < min_size_bytes: + logger.debug(f"File size insufficient: {os.path.basename(file_path)} ({file_size} bytes)") + return False + + return True + except Exception as e: + logger.debug(f"File check error: {os.path.basename(file_path)} - {e}") + return False + + +def check_folder_completeness(folder_path: str, + required_patterns: Optional[List[str]] = None, + min_file_size: int = 1024) -> bool: + """폴더의 다운로드 완전성 확인""" + try: + all_files = os.listdir(folder_path) + if required_patterns: + for pattern in required_patterns: + matching_files = [f for f in all_files if f.startswith(pattern)] + if not matching_files: + return False + for matched_file in matching_files: + file_path = os.path.join(folder_path, matched_file) + if not check_file_size(file_path, min_file_size): + return False + return True + + nc_files = [f for f in os.listdir(folder_path) if f.endswith('.nc')] + + if not nc_files: + logger.debug("No NC files found") + return False + + invalid_count = 0 + for nc_file in nc_files: + file_path = os.path.join(folder_path, nc_file) + if not check_file_size(file_path, min_file_size): + invalid_count += 1 + + if invalid_count > 0: + logger.debug(f"{invalid_count} files with insufficient size") + return False + + return True + except Exception as e: + logger.debug(f"Validation error: {e}") + return False + + +def get_latest_forecast_date(base_path: str, + max_folders_to_check: int = 7, + required_patterns: Optional[List[str]] = None, + min_file_size: int = 1024) -> Optional[str]: + """ + YYYYMMDD 폴더 중 내림차순 상위 N개에서 완전한 최신 폴더의 생성 시간 반환 + + Args: + base_path: 예보 파일 저장 경로 + max_folders_to_check: 확인할 최신 폴더 개수 + required_patterns: 필수 파일 목록 + min_file_size: 파일 최소 크기 (bytes) + + Returns: + YYYYMMDDHHmm 형식 또는 None + """ + if not os.path.exists(base_path): + logger.warning(f"Path not found: {base_path}") + return None + + try: + subdirs = [d for d in os.listdir(base_path) + if os.path.isdir(os.path.join(base_path, d))] + except PermissionError: + logger.warning(f"Permission denied: {base_path}") + return None + + valid_folders = [] + for subdir in subdirs: + if len(subdir) == 8 and subdir.isdigit(): + try: + datetime.strptime(subdir, '%Y%m%d') + folder_path = os.path.join(base_path, subdir) + creation_time = os.path.getctime(folder_path) + valid_folders.append((subdir, creation_time, folder_path)) + except (ValueError, OSError): + continue + + if not valid_folders: + logger.warning(f"No valid date folders: {base_path}") + return None + + valid_folders.sort(key=lambda x: x[0], reverse=True) + folders_to_check = valid_folders[:max_folders_to_check] + + folders_to_check.sort(key=lambda x: x[1], reverse=True) + + for folder_name, creation_timestamp, folder_path in folders_to_check: + is_complete = check_folder_completeness( + folder_path, + required_patterns=required_patterns, + min_file_size=min_file_size + ) + + if is_complete: + return folder_name + + logger.warning(f"No complete folder: {base_path}") + return None + + +def get_earliest_latest_forecast_date(wind_path: str = None, + hydr_path: str = None, + max_folders_to_check: int = 7, + required_patterns: Optional[List[str]] = None, + min_file_size: int = 1024) -> Optional[dict]: + """ + 바람과 해수 예보의 완전한 최신 폴더 생성 시간 중 더 과거 시간 반환 + + Args: + wind_path: 바람 예보 경로 + hydr_path: 해수 예보 경로 + max_folders_to_check: 확인할 최신 폴더 개수 + required_patterns: 필수 파일 목록 + min_file_size: 파일 최소 크기 (bytes) + + Returns: + 예보 정보 딕셔너리 또는 None + """ + if wind_path is None: + wind_path = str(STORAGE.POS_WIND) + if hydr_path is None: + hydr_path = str(STORAGE.POS_HYDR) + if required_patterns is None: + required_patterns = ["EA012", "KO108"] + + wind_latest = get_latest_forecast_date( + wind_path, + max_folders_to_check=max_folders_to_check, + required_patterns=required_patterns, + min_file_size=min_file_size + ) + + hydr_latest = get_latest_forecast_date( + hydr_path, + max_folders_to_check=max_folders_to_check, + required_patterns=required_patterns, + min_file_size=min_file_size + ) + + if wind_latest is None or hydr_latest is None: + logger.warning(f"Warning: No forecast received in the last {max_folders_to_check} days. Contact administrator.") + return None + + latest_folder_name = min(wind_latest, hydr_latest) + latest_folder_name_formatted = datetime.strptime(latest_folder_name, "%Y%m%d").strftime("%Y-%m-%d") + latest_receive_date = (datetime.strptime(latest_folder_name, "%Y%m%d") + timedelta(days=1)).strftime("%Y-%m-%d") + + hydr_file_path = os.path.join(hydr_path, hydr_latest, f"KO108_MOHID_HYDR_SURF_{hydr_latest}00.nc") + with xr.open_dataset(hydr_file_path) as ds: + start_date = ds['time'].values[0] + end_date = ds['time'].values[-1] + diff = end_date - start_date + hour_diff = diff / np.timedelta64(1, 'h') + hour_diff_string = f"{int(hour_diff)}h" + + return_json = { + "date": latest_folder_name_formatted, + "receivedDate": latest_receive_date + " 12:00", + "startDate": pd.Timestamp(start_date).strftime("%Y-%m-%d %H:%M:%S"), + "endDate": pd.Timestamp(end_date).strftime("%Y-%m-%d %H:%M:%S"), + "diff": hour_diff_string + } + + return return_json + + +if __name__ == "__main__": + result = get_earliest_latest_forecast_date() + + if result: + logger.info(f"Result: {result}") + else: + logger.info("No complete forecast folder found") diff --git a/prediction/opendrift/logger.py b/prediction/opendrift/logger.py new file mode 100644 index 0000000..122b314 --- /dev/null +++ b/prediction/opendrift/logger.py @@ -0,0 +1,68 @@ +""" +logger.py + +로깅 설정 모듈 +print() 대신 logging 모듈을 사용하여 일관된 로그 출력을 제공합니다. +""" + +import logging +import sys +from typing import Optional + + +def setup_logger(name: str = "opendrift", level: int = logging.INFO, + log_format: Optional[str] = None) -> logging.Logger: + """ + 로거를 설정하고 반환합니다. + + Parameters + ---------- + name : str + 로거 이름 (기본값: "opendrift") + level : int + 로그 레벨 (기본값: logging.INFO) + log_format : str, optional + 로그 포맷 (기본값: 표준 포맷) + + Returns + ------- + logging.Logger + 설정된 로거 인스턴스 + """ + logger = logging.getLogger(name) + + if not logger.handlers: + logger.setLevel(level) + + if log_format is None: + log_format = '[%(asctime)s] %(levelname)s - %(message)s' + + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(level) + handler.setFormatter(logging.Formatter(log_format, datefmt='%Y-%m-%d %H:%M:%S')) + logger.addHandler(handler) + + return logger + + +# 기본 로거 인스턴스 +logger = setup_logger() + + +def get_logger(name: str = None) -> logging.Logger: + """ + 모듈별 로거를 가져옵니다. + + Parameters + ---------- + name : str, optional + 로거 이름. None이면 기본 로거 반환 + + Returns + ------- + logging.Logger + 로거 인스턴스 + """ + if name is None: + return logger + return logging.getLogger(f"opendrift.{name}") diff --git a/prediction/opendrift/shutdown.sh b/prediction/opendrift/shutdown.sh new file mode 100644 index 0000000..47778dc --- /dev/null +++ b/prediction/opendrift/shutdown.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# stop_server.sh - 백그라운드 FastAPI 서버를 종료하는 스크립트 + +# 사용할 PID 파일 및 로그 파일 이름 정의 +PID_FILE="server.pid" +LOG_FILE="uvicorn.log" + +# ---------------------------------------------------- + +echo "--- FastAPI 서버 종료 스크립트 ---" + +# PID 파일이 존재하는지 확인 +if [ ! -f "$PID_FILE" ]; then + echo "오류: PID 파일 ($PID_FILE)을 찾을 수 없습니다. 서버가 실행 중이 아닐 수 있습니다." + exit 1 +fi + +# PID 파일에서 PID 읽기 +PID=$(cat "$PID_FILE") + +# 해당 PID를 가진 프로세스가 실제로 실행 중인지 확인 +if kill -0 "$PID" 2>/dev/null; then + echo "PID $PID 를 가진 서버 프로세스를 종료합니다..." + # -TERM 시그널을 보내 프로세스를 우아하게 종료 시도 + kill -TERM "$PID" + + # 프로세스가 종료될 때까지 최대 10초 대기 + for i in {1..10}; do + if ! kill -0 "$PID" 2>/dev/null; then + echo "서버 (PID $PID)가 성공적으로 종료되었습니다." + break + fi + sleep 1 + done + + # 10초 후에도 종료되지 않았다면 강제 종료 + if kill -0 "$PID" 2>/dev/null; then + echo "경고: 서버가 정상 종료되지 않아 강제 종료 (kill -9) 합니다." + kill -9 "$PID" + fi +else + echo "PID $PID 를 가진 프로세스가 실행 중이지 않습니다." +fi + +# PID 파일 및 로그 파일 정리 +rm -f "$PID_FILE" +echo "PID 파일($PID_FILE)이 삭제되었습니다." +# 로그 파일도 필요하면 삭제하거나 보관할 수 있습니다. +# rm -f "$LOG_FILE" + +echo "종료 프로세스 완료." + diff --git a/prediction/opendrift/startup.sh b/prediction/opendrift/startup.sh new file mode 100644 index 0000000..20c9c5f --- /dev/null +++ b/prediction/opendrift/startup.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# start_server.sh - FastAPI 서버를 백그라운드에서 시작하는 스크립트 + +# 사용할 PID 파일 및 로그 파일 이름 정의 +PID_FILE="server.pid" +LOG_FILE="uvicorn.log" + +# uvicorn 명령 (가상환경이 활성화되어 있어야 함, 또는 venv 경로를 명시해야 함) +# 예를 들어, venv/bin/uvicorn main:app --host 0.0.0.0 --port 8000 +UVICORN_CMD="uvicorn api:app --host 0.0.0.0 --port 5003 --workers 1" + +# ---------------------------------------------------- + +echo "--- FastAPI 서버 시작 스크립트 ---" + +# 서버가 이미 실행 중인지 확인 (PID 파일 존재 여부) +if [ -f "$PID_FILE" ]; then + PID=$(cat "$PID_FILE") + # PID가 실제로 실행 중인 프로세스인지 확인 + if kill -0 "$PID" 2>/dev/null; then + echo "오류: 서버가 이미 PID $PID 로 실행 중입니다. 먼저 종료해주세요." + exit 1 + else + echo "경고: 오래된 PID 파일($PID_FILE)을 찾았습니다. 삭제하고 새로 시작합니다." + rm -f "$PID_FILE" + fi +fi + +echo "PID 파일 경로: $PID_FILE" +echo "로그 파일 경로: $LOG_FILE" +echo "명령어 실행: $UVICORN_CMD" + +# uvicorn을 nohup을 사용하여 백그라운드 실행. 모든 출력을 로그 파일로 리다이렉트. +# $!는 백그라운드에서 실행된 명령어의 PID를 반환합니다. +nohup $UVICORN_CMD > "$LOG_FILE" 2>&1 & + +# 백그라운드 프로세스의 PID를 파일에 저장 +echo $! > "$PID_FILE" +NEW_PID=$(cat "$PID_FILE") + +echo "=====================================" +echo "서버가 성공적으로 백그라운드에서 시작되었습니다." +echo "새로운 PID: $NEW_PID" +echo "로그 확인: tail -f $LOG_FILE" +echo "종료 명령: ./stop_server.sh" +echo "=====================================" + diff --git a/prediction/opendrift/utils.py b/prediction/opendrift/utils.py new file mode 100644 index 0000000..8d7faa8 --- /dev/null +++ b/prediction/opendrift/utils.py @@ -0,0 +1,267 @@ +""" +utils.py + +공통 유틸리티 함수 모듈 +여러 모듈에서 중복 사용되는 함수들을 통합합니다. +""" + +import os +import numpy as np +import pandas as pd +from datetime import datetime, timedelta +from typing import Optional, Tuple, List + +from config import STORAGE, SIM, FilePatterns + + +def haversine_distance(lon1: float, lat1: float, lon2: float, lat2: float, + return_km: bool = True) -> float: + """ + 두 지점 간의 Haversine 거리를 계산합니다. + + Parameters + ---------- + lon1, lat1 : float + 첫 번째 지점의 경도, 위도 + lon2, lat2 : float + 두 번째 지점의 경도, 위도 + return_km : bool + True이면 km 단위, False이면 m 단위로 반환 + + Returns + ------- + float + 두 지점 간의 거리 + """ + lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2]) + + dlon = lon2 - lon1 + dlat = lat2 - lat1 + a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2 + c = 2 * np.arcsin(np.sqrt(a)) + + if return_km: + return c * SIM.EARTH_RADIUS_KM + else: + return c * SIM.EARTH_RADIUS_M + + +def find_time_index(ds, target_time) -> Tuple[int, datetime]: + """ + 입력된 시간과 동일하거나 과거이면서 가장 가까운 시간의 인덱스를 찾습니다. + + Parameters + ---------- + ds : xarray.Dataset + NetCDF 데이터셋 + target_time : str or datetime + 목표 시간 (예: '2024-01-15 12:00:00' 또는 datetime 객체) + + Returns + ------- + time_idx : int + 선택된 시간의 인덱스 + selected_time : datetime + 선택된 시간 + + Raises + ------ + ValueError + 목표 시간 이전의 데이터가 없는 경우 + """ + if isinstance(target_time, str): + target_time = pd.to_datetime(target_time) + + time_var = ds['time'].values + times = pd.to_datetime(time_var) + + valid_times = times[times <= target_time] + + if len(valid_times) == 0: + raise ValueError(f"목표 시간 {target_time} 이전의 데이터가 없습니다. " + f"데이터의 시작 시간: {times[0]}") + + selected_time = valid_times.max() + time_idx = np.where(times == selected_time)[0][0] + + return time_idx, selected_time + + +def convert_and_round(arr: np.ndarray, land_mask: np.ndarray, + land_value: int = 0, decimals: int = 3) -> List[List]: + """ + 2D numpy 배열을 리스트로 변환하면서 반올림 및 육지 마스킹을 처리합니다. + + Parameters + ---------- + arr : np.ndarray + 변환할 2D 배열 + land_mask : np.ndarray + 육지 마스크 (True인 위치는 육지) + land_value : int + 육지 값 (기본값: 0) + decimals : int + 반올림 소수점 자릿수 (기본값: 3) + + Returns + ------- + List[List] + 변환된 2D 리스트 + """ + result = np.where( + land_mask | np.isnan(arr), + float(land_value), + np.round(arr.astype(float), decimals) + ) + return result.tolist() + + +def check_nc_file_by_date(base_path: str, date_obj: datetime, + max_attempts: int = None) -> Tuple[Optional[str], Optional[datetime]]: + """ + 주어진 날짜로 NC 파일이 존재하는지 확인하고, 없으면 이전 날짜로 재시도합니다. + + Parameters + ---------- + base_path : str + 기본 저장 경로 + date_obj : datetime + 시작 날짜 + max_attempts : int, optional + 최대 시도 횟수 (기본값: FILE_FALLBACK_DAYS) + + Returns + ------- + file_path : str or None + 찾은 파일 경로, 없으면 None + final_date : datetime or None + 최종 사용된 날짜, 없으면 None + """ + if max_attempts is None: + max_attempts = SIM.FILE_FALLBACK_DAYS + + is_wind = "wind" in base_path.lower() + + for _ in range(max_attempts): + date_str = date_obj.strftime("%Y%m%d") + dir_path = os.path.join(base_path, date_str) + + if not os.path.exists(dir_path): + date_obj -= timedelta(days=1) + continue + + if is_wind: + file_name = FilePatterns.get_wind_filename(date_str) + else: + file_name = FilePatterns.get_hydr_filename(date_str) + + file_path = os.path.join(dir_path, file_name) + + if os.path.exists(file_path): + return file_path, date_obj + + date_obj -= timedelta(days=1) + + return None, None + + +def check_nc_files_for_date(date_obj: datetime, + primary_wind_path: str = None, + fallback_wind_path: str = None, + primary_hydr_path: str = None, + fallback_hydr_path: str = None) -> Tuple[Optional[str], Optional[str], Optional[datetime], Optional[datetime]]: + """ + 바람과 해양 NC 파일을 모두 확인합니다. (primary 경로 우선, fallback 경로 대체) + + Parameters + ---------- + date_obj : datetime + 시작 날짜 + primary_wind_path : str, optional + 바람 데이터 기본 경로 (기본값: /storage/pos_wind) + fallback_wind_path : str, optional + 바람 데이터 대체 경로 (기본값: /storage/wind) + primary_hydr_path : str, optional + 해양 데이터 기본 경로 (기본값: /storage/pos_hydr) + fallback_hydr_path : str, optional + 해양 데이터 대체 경로 (기본값: /storage/hydr) + + Returns + ------- + wind_nc_path : str or None + ocean_nc_path : str or None + wind_date : datetime or None + ocean_date : datetime or None + """ + if primary_wind_path is None: + primary_wind_path = str(STORAGE.POS_WIND) + if fallback_wind_path is None: + fallback_wind_path = str(STORAGE.WIND) + if primary_hydr_path is None: + primary_hydr_path = str(STORAGE.POS_HYDR) + if fallback_hydr_path is None: + fallback_hydr_path = str(STORAGE.HYDR) + + # 바람 파일 확인 + wind_nc_path, wind_date = check_nc_file_by_date(primary_wind_path, date_obj) + if not wind_nc_path: + wind_nc_path, wind_date = check_nc_file_by_date(fallback_wind_path, date_obj) + + # 해양 파일 확인 + ocean_nc_path, ocean_date = check_nc_file_by_date(primary_hydr_path, date_obj) + if not ocean_nc_path: + ocean_nc_path, ocean_date = check_nc_file_by_date(fallback_hydr_path, date_obj) + + return wind_nc_path, ocean_nc_path, wind_date, ocean_date + + +def check_img_file_by_date(date_obj: datetime, img_type: str, + max_attempts: int = None) -> Tuple[Optional[str], Optional[datetime]]: + """ + 주어진 날짜로 이미지 폴더가 존재하는지 확인하고, 없으면 이전 날짜로 재시도합니다. + + Parameters + ---------- + date_obj : datetime + 시작 날짜 + img_type : str + 이미지 타입 (예: "wind", "hydr", "pos_wind", "pos_hydr") + max_attempts : int, optional + 최대 시도 횟수 (기본값: FILE_FALLBACK_DAYS) + + Returns + ------- + folder_path : str or None + 찾은 폴더 경로, 없으면 None + final_date : datetime or None + 최종 사용된 날짜, 없으면 None + """ + if max_attempts is None: + max_attempts = SIM.FILE_FALLBACK_DAYS + + for _ in range(max_attempts): + date_str = date_obj.strftime("%Y%m%d") + dir_path = os.path.join(f"/storage/{img_type}", date_str) + + if not os.path.exists(dir_path): + date_obj -= timedelta(days=1) + continue + + file_path = os.path.join(dir_path, "visual_image") + + if os.path.exists(file_path): + return file_path, date_obj + + date_obj -= timedelta(days=1) + + return None, None + + +def kst_to_utc(dt: datetime) -> datetime: + """KST 시간을 UTC로 변환합니다.""" + return dt - timedelta(hours=SIM.TIMEZONE_OFFSET_HOURS) + + +def utc_to_kst(dt: datetime) -> datetime: + """UTC 시간을 KST로 변환합니다.""" + return dt + timedelta(hours=SIM.TIMEZONE_OFFSET_HOURS)