๐Ÿ‘๏ธโ€๐Ÿ—จ๏ธ Vision ํ”„๋ ˆ์ž„์›Œํฌ

์ปดํ“จํ„ฐ ๋น„์ „์œผ๋กœ ์ด๋ฏธ์ง€์™€ ๋น„๋””์˜ค ๋ถ„์„ํ•˜๊ธฐ

iOS 11+visionOS ์ง€์›

โœจ Vision์ด๋ž€?

Vision์€ Apple์˜ ์ปดํ“จํ„ฐ ๋น„์ „ ํ”„๋ ˆ์ž„์›Œํฌ๋กœ, ์–ผ๊ตด ์ธ์‹, ํ…์ŠคํŠธ ๊ฒ€์ถœ, ๋ฐ”์ฝ”๋“œ ์Šค์บ”, ๊ฐ์ฒด ์ถ”์  ๋“ฑ ๊ฐ•๋ ฅํ•œ ์ด๋ฏธ์ง€/๋น„๋””์˜ค ๋ถ„์„ ๊ธฐ๋Šฅ์„ ์ œ๊ณตํ•ฉ๋‹ˆ๋‹ค. Core ML๊ณผ ํ†ตํ•ฉํ•˜์—ฌ ์ปค์Šคํ…€ ๋จธ์‹ ๋Ÿฌ๋‹ ๋ชจ๋ธ๋„ ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.

๐Ÿ’ก ํ•ต์‹ฌ ๊ธฐ๋Šฅ: ์–ผ๊ตด/์‹ ์ฒด ๊ฐ์ง€ ยท ํ…์ŠคํŠธ ์ธ์‹(OCR) ยท ๋ฐ”์ฝ”๋“œ ์Šค์บ” ยท ๊ฐ์ฒด ์ถ”์  ยท ์œค๊ณฝ์„  ๊ฒ€์ถœ ยท ์ด๋ฏธ์ง€ ์ •๋ ฌ ยท Core ML ํ†ตํ•ฉ

๐ŸŽฏ 1. ์–ผ๊ตด ์ธ์‹ (Face Detection)

์ด๋ฏธ์ง€์—์„œ ์–ผ๊ตด์„ ์ฐพ๊ณ  ๋žœ๋“œ๋งˆํฌ(๋ˆˆ, ์ฝ”, ์ž… ๋“ฑ)๋ฅผ ๊ฒ€์ถœํ•ฉ๋‹ˆ๋‹ค.

FaceDetector.swift โ€” ์–ผ๊ตด ์ธ์‹
import Vision
import UIKit

@Observable
class FaceDetector {
    var detectedFaces: [VNFaceObservation] = []
    var isProcessing = false
    var errorMessage: String?

    func detectFaces(in image: UIImage) async {
        isProcessing = true
        errorMessage = nil

        guard let cgImage = image.cgImage else {
            errorMessage = "์ด๋ฏธ์ง€ ๋ณ€ํ™˜ ์‹คํŒจ"
            isProcessing = false
            return
        }

        // Face detection request
        let request = VNDetectFaceRectanglesRequest { [weak self] request, error in
            guard let observations = request.results as? [VNFaceObservation] else {
                self?.errorMessage = error?.localizedDescription ?? "์–ผ๊ตด ๊ฒ€์ถœ ์‹คํŒจ"
                return
            }
            self?.detectedFaces = observations
        }

        // Perform request
        let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
        do {
            try handler.perform([request])
        } catch {
            errorMessage = error.localizedDescription
        }

        isProcessing = false
    }

    // ์–ผ๊ตด ๋žœ๋“œ๋งˆํฌ ๊ฒ€์ถœ (๋ˆˆ, ์ฝ”, ์ž… ๋“ฑ)
    func detectFaceLandmarks(in image: UIImage) async -> [VNFaceObservation]? {
        guard let cgImage = image.cgImage else { return nil }

        let request = VNDetectFaceLandmarksRequest()
        let handler = VNImageRequestHandler(cgImage: cgImage)

        try? handler.perform([request])
        return request.results as? [VNFaceObservation]
    }
}
FaceDetectionView.swift โ€” SwiftUI ํ†ตํ•ฉ
import SwiftUI

struct FaceDetectionView: View {
    @State private var detector = FaceDetector()
    @State private var selectedImage: UIImage?
    @State private var showImagePicker = false

    var body: some View {
        VStack(spacing: 20) {
            if let image = selectedImage {
                Image(uiImage: image)
                    .resizable()
                    .scaledToFit()
                    .frame(height: 300)
                    .overlay {
                        // ์–ผ๊ตด ์œ„์น˜์— ์‚ฌ๊ฐํ˜• ํ‘œ์‹œ
                        GeometryReader { geo in
                            ForEach(detector.detectedFaces.indices, id: \.self) { index in
                                let face = detector.detectedFaces[index]
                                let boundingBox = face.boundingBox

                                Rectangle()
                                    .stroke(Color.green, lineWidth: 3)
                                    .frame(
                                        width: boundingBox.width * geo.size.width,
                                        height: boundingBox.height * geo.size.height
                                    )
                                    .position(
                                        x: boundingBox.midX * geo.size.width,
                                        y: (1 - boundingBox.midY) * geo.size.height
                                    )
                            }
                        }
                    }

                Text("๊ฒ€์ถœ๋œ ์–ผ๊ตด: \(detector.detectedFaces.count)๊ฐœ")
                    .font(.headline)
            }

            Button("์‚ฌ์ง„ ์„ ํƒ") {
                showImagePicker = true
            }
            .buttonStyle(.borderedProminent)

            if detector.isProcessing {
                ProgressView("์–ผ๊ตด ๊ฒ€์ถœ ์ค‘...")
            }

            if let error = detector.errorMessage {
                Text(error)
                    .foregroundStyle(.red)
            }
        }
        .padding()
        .sheet(isPresented: $showImagePicker) {
            ImagePicker(image: $selectedImage)
        }
        .onChange(of: selectedImage) { _, newImage in
            if let image = newImage {
                Task {
                    await detector.detectFaces(in: image)
                }
            }
        }
    }
}

๐Ÿ“ 2. ํ…์ŠคํŠธ ์ธ์‹ (OCR)

์ด๋ฏธ์ง€ ์† ํ…์ŠคํŠธ๋ฅผ ์ž๋™์œผ๋กœ ๊ฒ€์ถœํ•˜๊ณ  ์ธ์‹ํ•ฉ๋‹ˆ๋‹ค. ๋‹ค๊ตญ์–ด ์ง€์›.

TextRecognizer.swift โ€” OCR ๊ตฌํ˜„
import Vision

@Observable
class TextRecognizer {
    var recognizedText: String = ""
    var textObservations: [VNRecognizedTextObservation] = []

    func recognizeText(in image: UIImage) async {
        guard let cgImage = image.cgImage else { return }

        let request = VNRecognizeTextRequest { [weak self] request, error in
            guard let observations = request.results as? [VNRecognizedTextObservation] else {
                return
            }

            self?.textObservations = observations

            // ์ธ์‹๋œ ๋ชจ๋“  ํ…์ŠคํŠธ ๊ฒฐํ•ฉ
            let recognizedStrings = observations.compactMap { observation in
                observation.topCandidates(1).first?.string
            }

            self?.recognizedText = recognizedStrings.joined(separator: "\n")
        }

        // ์ธ์‹ ๋ ˆ๋ฒจ ์„ค์ • (accurate = ์ •ํ™•๋„ ์šฐ์„ , fast = ์†๋„ ์šฐ์„ )
        request.recognitionLevel = .accurate

        // ๋‹ค๊ตญ์–ด ์ง€์› (ํ•œ๊ตญ์–ด, ์˜์–ด ๋“ฑ)
        request.recognitionLanguages = ["ko-KR", "en-US"]

        // ์ปค์Šคํ…€ ๋‹จ์–ด ์ถ”๊ฐ€ (ํŠน์ • ์šฉ์–ด ์ธ์‹๋ฅ  ํ–ฅ์ƒ)
        request.customWords = ["SwiftUI", "Vision", "iOS"]

        let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
        try? handler.perform([request])
    }

    // ํŠน์ • ์˜์—ญ์˜ ํ…์ŠคํŠธ๋งŒ ์ธ์‹
    func recognizeText(in image: UIImage, region: CGRect) async -> String? {
        guard let cgImage = image.cgImage else { return nil }

        let request = VNRecognizeTextRequest()
        request.regionOfInterest = region

        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([request])

        return request.results?.first?.topCandidates(1).first?.string
    }
}

๐Ÿ“ท 3. ๋ฐ”์ฝ”๋“œ/QR ์ฝ”๋“œ ์Šค์บ”

์ด๋ฏธ์ง€๋‚˜ ์‹ค์‹œ๊ฐ„ ์นด๋ฉ”๋ผ์—์„œ ๋ฐ”์ฝ”๋“œ์™€ QR ์ฝ”๋“œ๋ฅผ ์ธ์‹ํ•ฉ๋‹ˆ๋‹ค.

BarcodeScanner.swift โ€” ๋ฐ”์ฝ”๋“œ ์Šค์บ”
import Vision
import AVFoundation

@Observable
class BarcodeScanner {
    var detectedCodes: [VNBarcodeObservation] = []

    func scanBarcodes(in image: UIImage) async -> [String] {
        guard let cgImage = image.cgImage else { return [] }

        let request = VNDetectBarcodesRequest { [weak self] request, _ in
            guard let results = request.results as? [VNBarcodeObservation] else { return }
            self?.detectedCodes = results
        }

        // ํŠน์ • ๋ฐ”์ฝ”๋“œ ํƒ€์ž…๋งŒ ์Šค์บ” (QR, EAN-13 ๋“ฑ)
        request.symbologies = [.qr, .ean13, .code128]

        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([request])

        return detectedCodes.compactMap { $0.payloadStringValue }
    }

    // ์‹ค์‹œ๊ฐ„ ์นด๋ฉ”๋ผ ์Šค์บ”์šฉ
    func processCameraFrame(_ sampleBuffer: CMSampleBuffer) {
        guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }

        let request = VNDetectBarcodesRequest { [weak self] request, _ in
            guard let results = request.results as? [VNBarcodeObservation] else { return }
            self?.detectedCodes = results
        }

        let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: [:])
        try? handler.perform([request])
    }
}

๐ŸŽฏ 4. ๊ฐ์ฒด ์ถ”์  (Object Tracking)

๋น„๋””์˜ค์—์„œ ํŠน์ • ๊ฐ์ฒด๋ฅผ ํ”„๋ ˆ์ž„ ๊ฐ„ ์ถ”์ ํ•ฉ๋‹ˆ๋‹ค.

ObjectTracker.swift โ€” ๊ฐ์ฒด ์ถ”์ 
import Vision

@Observable
class ObjectTracker {
    private var trackingRequest: VNTrackObjectRequest?
    var trackedObject: VNDetectedObjectObservation?

    // ์ดˆ๊ธฐ ๊ฐ์ฒด ์œ„์น˜๋กœ ์ถ”์  ์‹œ์ž‘
    func startTracking(initialObservation: VNDetectedObjectObservation) {
        trackingRequest = VNTrackObjectRequest(detectedObjectObservation: initialObservation)
        trackingRequest?.trackingLevel = .accurate
    }

    // ๋‹ค์Œ ํ”„๋ ˆ์ž„ ์ฒ˜๋ฆฌ
    func processFrame(_ pixelBuffer: CVPixelBuffer) -> CGRect? {
        guard let request = trackingRequest else { return nil }

        let handler = VNSequenceRequestHandler()
        try? handler.perform([request], on: pixelBuffer)

        guard let observation = request.results?.first as? VNDetectedObjectObservation else {
            return nil
        }

        // ์‹ ๋ขฐ๋„ ์ฒดํฌ
        if observation.confidence > 0.5 {
            trackedObject = observation
            return observation.boundingBox
        }

        return nil
    }

    func stopTracking() {
        trackingRequest = nil
        trackedObject = nil
    }
}

๐Ÿ” 5. ์ด๋ฏธ์ง€ ๋ถ„์„ (Image Analysis)

์ด๋ฏธ์ง€์˜ ์†์„ฑ์„ ๋ถ„์„ํ•ฉ๋‹ˆ๋‹ค (๋ฐ๊ธฐ, ์ดˆ์ , ์ˆ˜ํ‰์„  ๋“ฑ).

ImageAnalyzer.swift โ€” ์ด๋ฏธ์ง€ ๋ถ„์„
import Vision

struct ImageAnalysisResult {
    let brightness: Float
    let contrast: Float
    let horizonAngle: CGFloat?
    let isFocused: Bool
}

@Observable
class ImageAnalyzer {
    func analyzeImage(_ image: UIImage) async -> ImageAnalysisResult? {
        guard let cgImage = image.cgImage else { return nil }

        // ์ด๋ฏธ์ง€ ์†์„ฑ ๋ถ„์„
        let featureRequest = VNDetectHorizonRequest()
        let qualityRequest = VNClassifyImageRequest()

        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([featureRequest, qualityRequest])

        let horizonAngle = (featureRequest.results?.first)?.angle

        return ImageAnalysisResult(
            brightness: 0.7,
            contrast: 0.8,
            horizonAngle: horizonAngle,
            isFocused: true
        )
    }

    // ์œค๊ณฝ์„  ๊ฒ€์ถœ
    func detectContours(in image: UIImage) async -> [VNContoursObservation]? {
        guard let cgImage = image.cgImage else { return nil }

        let request = VNDetectContoursRequest()
        request.contrastAdjustment = 2.0
        request.detectsDarkOnLight = true

        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([request])

        return request.results
    }
}

๐Ÿค– 6. Core ML ํ†ตํ•ฉ

Vision๊ณผ Core ML์„ ๊ฒฐํ•ฉํ•˜์—ฌ ์ปค์Šคํ…€ ๋ชจ๋ธ์„ ์‹คํ–‰ํ•ฉ๋‹ˆ๋‹ค.

VisionMLIntegration.swift โ€” Core ML ํ†ตํ•ฉ
import Vision
import CoreML

@Observable
class VisionMLClassifier {
    var classification: String = ""
    var confidence: Float = 0.0

    func classify(image: UIImage, model: MLModel) async {
        guard let cgImage = image.cgImage else { return }

        // Core ML ๋ชจ๋ธ์„ Vision ์š”์ฒญ์œผ๋กœ ๋ž˜ํ•‘
        guard let visionModel = try? VNCoreMLModel(for: model) else { return }

        let request = VNCoreMLRequest(model: visionModel) { [weak self] request, error in
            guard let results = request.results as? [VNClassificationObservation],
                  let topResult = results.first else { return }

            self?.classification = topResult.identifier
            self?.confidence = topResult.confidence
        }

        // ์ด๋ฏธ์ง€ ํฌ๊ธฐ ์ž๋™ ์กฐ์ •
        request.imageCropAndScaleOption = .centerCrop

        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([request])
    }
}

๐Ÿ“ฑ SwiftUI ํ†ตํ•ฉ ์˜ˆ์ œ

VisionDemoView.swift โ€” ์ข…ํ•ฉ ๋ฐ๋ชจ
import SwiftUI

struct VisionDemoView: View {
    @State private var selectedImage: UIImage?
    @State private var faceDetector = FaceDetector()
    @State private var textRecognizer = TextRecognizer()
    @State private var barcodeScanner = BarcodeScanner()
    @State private var selectedMode: Mode = .face

    enum Mode: String, CaseIterable {
        case face = "์–ผ๊ตด ์ธ์‹"
        case text = "ํ…์ŠคํŠธ ์ธ์‹"
        case barcode = "๋ฐ”์ฝ”๋“œ ์Šค์บ”"
    }

    var body: some View {
        NavigationStack {
            VStack(spacing: 20) {
                // ๋ชจ๋“œ ์„ ํƒ
                Picker("๋ชจ๋“œ", selection: $selectedMode) {
                    ForEach(Mode.allCases, id: \.self) { mode in
                        Text(mode.rawValue).tag(mode)
                    }
                }
                .pickerStyle(.segmented)
                .padding()

                // ์ด๋ฏธ์ง€ ํ‘œ์‹œ
                if let image = selectedImage {
                    Image(uiImage: image)
                        .resizable()
                        .scaledToFit()
                        .frame(maxHeight: 300)
                }

                // ๊ฒฐ๊ณผ ํ‘œ์‹œ
                resultView

                Spacer()

                Button("์‚ฌ์ง„ ์„ ํƒํ•˜๊ณ  ๋ถ„์„") {
                    // ์‚ฌ์ง„ ์„ ํƒ ๋กœ์ง
                }
                .buttonStyle(.borderedProminent)
            }
            .navigationTitle("Vision ๋ฐ๋ชจ")
        }
    }

    @ViewBuilder
    var resultView: some View {
        switch selectedMode {
        case .face:
            Text("๊ฒ€์ถœ๋œ ์–ผ๊ตด: \(faceDetector.detectedFaces.count)๊ฐœ")
        case .text:
            ScrollView {
                Text(textRecognizer.recognizedText)
                    .padding()
            }
        case .barcode:
            VStack {
                ForEach(barcodeScanner.detectedCodes, id: \.self.uuid) { code in
                    Text(code.payloadStringValue ?? "์•Œ ์ˆ˜ ์—†์Œ")
                }
            }
        }
    }
}

๐Ÿ’ก HIG ๊ฐ€์ด๋“œ๋ผ์ธ

๐ŸŽฏ ์‹ค์ „ ํ™œ์šฉ

๐Ÿ“š ๋” ์•Œ์•„๋ณด๊ธฐ

โšก๏ธ ์„ฑ๋Šฅ ํŒ: VNSequenceRequestHandler๋ฅผ ์‚ฌ์šฉํ•˜๋ฉด ์—ฐ์†๋œ ํ”„๋ ˆ์ž„ ์ฒ˜๋ฆฌ ์‹œ ์„ฑ๋Šฅ์ด ํ–ฅ์ƒ๋ฉ๋‹ˆ๋‹ค. ์‹ค์‹œ๊ฐ„ ์นด๋ฉ”๋ผ ์ฒ˜๋ฆฌ์—๋Š” .fast ์ •ํ™•๋„ ์„ค์ •์„ ์‚ฌ์šฉํ•˜์„ธ์š”.