레슨 9 / 10·5개 토픽

모델 저장과 최적화

학습된 모델을 저장하고 불러오는 방법과 브라우저 환경에서의 성능 최적화 기법을 다룹니다. 메모리 관리, 백엔드 선택, 배치 처리 등 실전에서 필수적인 최적화 전략을 학습합니다.

model.save() 저장 방식

javascript

import * as tf from '@tensorflow/tfjs';

// 1. localStorage 저장 (소규모 모델, ~5MB)
await model.save('localstorage://my-model');

// 2. IndexedDB 저장 (대규모 모델 권장, ~수백MB)
await model.save('indexeddb://my-model');

// 3. 파일 다운로드 (model.json + weights.bin)
await model.save('downloads://my-model');

// 4. HTTP 서버 업로드
await model.save('http://localhost:3000/api/save-model');

// 저장된 모델 목록 확인
const models = await tf.io.listModels();
console.log(models);
// { 'localstorage://my-model': { dateSaved, modelTopologyBytes, ... } }

// 저장된 모델 삭제
await tf.io.removeModel('localstorage://my-model');

// 모델 복사
await tf.io.copyModel(
  'localstorage://my-model',
  'indexeddb://my-model-backup'
);

tf.loadLayersModel() 모델 로드

javascript

// localStorage에서 로드
const model = await tf.loadLayersModel('localstorage://my-model');

// IndexedDB에서 로드
const model2 = await tf.loadLayersModel('indexeddb://my-model');

// HTTP URL에서 로드 (정적 호스팅)
const model3 = await tf.loadLayersModel(
  'https://example.com/models/my-model/model.json'
);

// 로드 진행 상태 모니터링
const model4 = await tf.loadLayersModel(
  'https://example.com/models/my-model/model.json',
  {
    onProgress: (fraction) => {
      console.log("로딩: " + (fraction * 100).toFixed(0) + "%");
      progressBar.style.width = (fraction * 100) + "%";
    },
  }
);

// GraphModel 로드 (TF SavedModel 변환 모델)
const graphModel = await tf.loadGraphModel(
  'https://example.com/models/tfjs-model/model.json'
);

tf.tidy() 메모리 관리

javascript

// tf.tidy()로 중간 텐서 자동 해제
const result = tf.tidy(() => {
  const a = tf.tensor([1, 2, 3]);
  const b = tf.tensor([4, 5, 6]);
  const c = a.add(b);           // 중간 텐서
  const d = c.mul(tf.scalar(2)); // 중간 텐서
  return d;  // 반환값만 유지, a, b, c는 자동 해제
});

// 메모리 상태 확인
console.log(tf.memory());
// { numTensors: 1, numDataBuffers: 1, numBytes: 12, ... }

// 주의: tf.tidy() 안에서는 async 사용 불가
// 비동기 작업 시 수동 dispose() 사용
async function predictWithCleanup(model, input) {
  const tensor = tf.tensor(input);
  const prediction = model.predict(tensor);
  const result = await prediction.data();

  // 수동 메모리 해제
  tensor.dispose();
  prediction.dispose();

  return result;
}

// 메모리 누수 디버깅
const before = tf.memory().numTensors;
// ... 작업 수행 ...
const after = tf.memory().numTensors;
if (after > before) {
  console.warn("텐서 누수 감지:", after - before, "개");
}

tf.setBackend() 백엔드 선택

javascript

// 사용 가능한 백엔드 확인
console.log("현재 백엔드:", tf.getBackend());

// 백엔드 변경 (우선순위: webgl > wasm > cpu)
await tf.setBackend('webgl');   // GPU 가속 (기본, 가장 빠름)
await tf.setBackend('wasm');    // WebAssembly (GPU 없는 환경)
await tf.setBackend('cpu');     // CPU (가장 느림, 폴백)

// WASM 백엔드 사용 시 추가 설정
import '@tensorflow/tfjs-backend-wasm';
import { setWasmPaths } from '@tensorflow/tfjs-backend-wasm';
setWasmPaths('https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-wasm/dist/');

await tf.setBackend('wasm');
await tf.ready();

// 백엔드별 성능 벤치마크
async function benchmark(backendName) {
  await tf.setBackend(backendName);
  await tf.ready();

  const input = tf.randomNormal([1, 224, 224, 3]);
  const start = performance.now();
  for (let i = 0; i < 10; i++) {
    const pred = model.predict(input);
    await pred.data();
    pred.dispose();
  }
  const elapsed = performance.now() - start;
  input.dispose();
  console.log(backendName + ": " + (elapsed / 10).toFixed(1) + "ms/회");
}

배치 처리와 성능 최적화

javascript

// 1. 배치 예측으로 처리량 향상
async function batchPredict(model, images) {
  // 개별 이미지를 배치로 합치기
  const batch = tf.stack(images.map(img =>
    tf.browser.fromPixels(img).resizeBilinear([224, 224]).div(255.0)
  ));

  const predictions = model.predict(batch);
  const results = await predictions.array();

  batch.dispose();
  predictions.dispose();
  return results;
}

// 2. tf.nextFrame()으로 UI 블로킹 방지
async function trainWithUIUpdate(model, xs, ys, epochs) {
  for (let i = 0; i < epochs; i++) {
    const history = await model.fit(xs, ys, {
      epochs: 1,
      batchSize: 32,
    });

    // UI 업데이트를 위해 프레임 양보
    await tf.nextFrame();
    statusEl.textContent =
      "에포크 " + (i + 1) + "/" + epochs +
      " - loss: " + history.history.loss[0].toFixed(4);
  }
}

// 3. WebGL 텍스처 캐싱
tf.env().set('WEBGL_DELETE_TEXTURE_THRESHOLD', 0);

// 4. 모델 워밍업 (첫 추론은 느리므로 더미 입력으로 사전 실행)
async function warmupModel(model, inputShape) {
  const dummy = tf.zeros([1, ...inputShape]);
  const warmup = model.predict(dummy);
  await warmup.data();
  dummy.dispose();
  warmup.dispose();
  console.log("모델 워밍업 완료");
}

💡

tf.tidy()는 동기 코드에서만 사용할 수 있으며, 비동기 작업에서는 dispose()를 직접 호출해야 합니다. WebGL 백엔드가 가장 빠르지만 모바일에서는 WASM이 더 안정적일 수 있습니다. tf.memory()로 텐서 누수를 주기적으로 확인하고, tf.nextFrame()으로 긴 연산 중 UI 프리징을 방지하세요.

목록 ←객체 감지와 자연어 처리 다음: TensorFlow.js 레퍼런스→