레슨 8 / 10·4개 토픽

객체 감지와 자연어 처리

TensorFlow.js의 사전학습 모델을 활용하면 객체 감지, 자세 추정, 텍스트 유사도 분석, 콘텐츠 검열 등 다양한 AI 기능을 브라우저에서 바로 구현할 수 있습니다. 이 레슨에서는 COCO-SSD, PoseNet/MoveNet, Universal Sentence Encoder, Toxicity 모델을 실습합니다.

COCO-SSD 실시간 객체 감지

javascript

import * as cocoSsd from '@tensorflow-models/coco-ssd';

// 모델 로드
const model = await cocoSsd.load();

// 비디오 요소에서 실시간 객체 감지
const video = document.getElementById('webcam');
const canvas = document.getElementById('overlay');
const ctx = canvas.getContext('2d');

async function detectFrame() {
  const predictions = await model.detect(video);

  // 캔버스 초기화
  ctx.clearRect(0, 0, canvas.width, canvas.height);

  predictions.forEach(pred => {
    const [x, y, width, height] = pred.bbox;

    // 바운딩 박스 그리기
    ctx.strokeStyle = '#00ff00';
    ctx.lineWidth = 2;
    ctx.strokeRect(x, y, width, height);

    // 라벨 표시
    ctx.fillStyle = '#00ff00';
    ctx.font = '14px Arial';
    ctx.fillText(
      pred.class + " " + (pred.score * 100).toFixed(0) + "%",
      x, y > 15 ? y - 5 : y + 15
    );
  });

  requestAnimationFrame(detectFrame);
}

// 웹캠 시작 후 감지 루프 실행
const stream = await navigator.mediaDevices.getUserMedia({ video: true });
video.srcObject = stream;
video.onloadeddata = () => detectFrame();

PoseNet/MoveNet 자세 추정

javascript

import * as poseDetection from '@tensorflow-models/pose-detection';

// MoveNet 모델 생성 (SinglePose Lightning: 빠르고 가벼움)
const detector = await poseDetection.createDetector(
  poseDetection.SupportedModels.MoveNet,
  { modelType: poseDetection.movenet.modelType.SINGLEPOSE_LIGHTNING }
);

// 자세 추정 실행
const poses = await detector.estimatePoses(video);

if (poses.length > 0) {
  const keypoints = poses[0].keypoints;

  keypoints.forEach(kp => {
    if (kp.score > 0.3) {
      // 키포인트 그리기
      ctx.beginPath();
      ctx.arc(kp.x, kp.y, 5, 0, 2 * Math.PI);
      ctx.fillStyle = '#ff0000';
      ctx.fill();
      ctx.fillText(kp.name, kp.x + 8, kp.y);
    }
  });

  // 스켈레톤 연결선 그리기
  const connections = poseDetection.util.getAdjacentPairs(
    poseDetection.SupportedModels.MoveNet
  );
  connections.forEach(([i, j]) => {
    const kpA = keypoints[i];
    const kpB = keypoints[j];
    if (kpA.score > 0.3 && kpB.score > 0.3) {
      ctx.beginPath();
      ctx.moveTo(kpA.x, kpA.y);
      ctx.lineTo(kpB.x, kpB.y);
      ctx.strokeStyle = '#00ffff';
      ctx.lineWidth = 2;
      ctx.stroke();
    }
  });
}

Universal Sentence Encoder 텍스트 유사도

javascript

import * as use from '@tensorflow-models/universal-sentence-encoder';

// 모델 로드
const model = await use.load();

// 문장 임베딩 생성
const sentences = [
  '오늘 날씨가 좋습니다',
  '오늘 날씨가 화창합니다',
  '주식 시장이 하락했습니다',
];

const embeddings = await model.embed(sentences);
const vectors = await embeddings.array();

// 코사인 유사도 계산
function cosineSimilarity(a, b) {
  let dotProduct = 0, normA = 0, normB = 0;
  for (let i = 0; i < a.length; i++) {
    dotProduct += a[i] * b[i];
    normA += a[i] * a[i];
    normB += b[i] * b[i];
  }
  return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}

// 문장 간 유사도 비교
const sim01 = cosineSimilarity(vectors[0], vectors[1]);
const sim02 = cosineSimilarity(vectors[0], vectors[2]);
console.log("날씨-화창 유사도:", sim01.toFixed(4));  // 높음 (~0.85)
console.log("날씨-주식 유사도:", sim02.toFixed(4));  // 낮음 (~0.15)

Toxicity 모델 콘텐츠 검열

javascript

import * as toxicity from '@tensorflow-models/toxicity';

// 독성 분류 모델 로드 (threshold: 판정 기준 확률)
const model = await toxicity.load(0.9);

const sentences = [
  'You are a wonderful person!',
  'I will destroy you completely!',
  'Have a great day!',
];

const predictions = await model.classify(sentences);

predictions.forEach(category => {
  console.log("카테고리:", category.label);
  category.results.forEach((result, i) => {
    const match = result.match;
    const prob = result.probabilities;
    console.log(
      "  " + sentences[i] + " -> " +
      (match ? "독성" : match === false ? "안전" : "판단 불가") +
      " (확률: " + (prob[1] * 100).toFixed(1) + "%)"
    );
  });
});
// 카테고리: identity_attack, insult, obscene,
//           severe_toxicity, sexual_explicit, threat, toxicity

💡

COCO-SSD는 80개 카테고리의 일상 객체를 감지할 수 있으며, MoveNet은 17개 신체 키포인트를 실시간으로 추정합니다. Universal Sentence Encoder는 512차원 벡터로 문장을 임베딩하여 의미적 유사도를 비교할 수 있습니다. 모든 모델은 최초 로드 시 네트워크 다운로드가 필요하므로 로딩 UI를 제공하세요.

목록 ←실전: 손글씨 인식 앱 다음: 모델 저장과 최적화→