๐ŸŒ KO

๐Ÿ‘๏ธโ€๐Ÿ—จ๏ธ Vision ํ”„๋ ˆ์ž„์›Œํฌ

โญ Difficulty: โญโญโญ โฑ๏ธ Est. Time: 2-3h ๐Ÿ“‚ App Services

์ปดํ“จํ„ฐ ๋น„์ „์œผ๋กœ ์ด๋ฏธ์ง€์™€ ๋น„๋””์˜ค ๋ถ„์„ํ•˜๊ธฐ

iOS 11+visionOS Supported

โœจ Vision is?

Vision is Apple's computer vision framework, providing powerful image/video analysis features including face detection, text recognition, barcode scanning, and object tracking. It integrates with Core ML to run custom machine learning models.

๐Ÿ’ก Key Features: Face/Body Detection ยท Text Recognition (OCR) ยท Barcode Scanning ยท Object Tracking ยท Contour Detection ยท Image Alignment ยท Core ML Integration

๐ŸŽฏ 1. ์–ผ๊ตด ์ธ์‹ (Face Detection)

Find faces in images and detect landmarks (eyes, nose, mouth, etc.).

FaceDetector.swift โ€” ์–ผ๊ตด ์ธ์‹
import Vision
import UIKit

@Observable
class FaceDetector {
    var detectedFaces: [VNFaceObservation] = []
    var isProcessing = false
    var errorMessage: String?

    func detectFaces(in image: UIImage) async {
        isProcessing = true
        errorMessage = nil

        guard let cgImage = image.cgImage else {
            errorMessage = "์ด๋ฏธ์ง€ ๋ณ€ํ™˜ ์‹คํŒจ"
            isProcessing = false
            return
        }

        // Face detection request
        let request = VNDetectFaceRectanglesRequest { [weak self] request, error in
            guard let observations = request.results as? [VNFaceObservation] else {
                self?.errorMessage = error?.localizedDescription ?? "์–ผ๊ตด ๊ฒ€์ถœ ์‹คํŒจ"
                return
            }
            self?.detectedFaces = observations
        }

        // Perform request
        let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
        do {
            try handler.perform([request])
        } catch {
            errorMessage = error.localizedDescription
        }

        isProcessing = false
    }

    // ์–ผ๊ตด ๋žœ๋“œ๋งˆํฌ ๊ฒ€์ถœ (๋ˆˆ, ์ฝ”, ์ž… ๋“ฑ)
    func detectFaceLandmarks(in image: UIImage) async -> [VNFaceObservation]? {
        guard let cgImage = image.cgImage else { return nil }

        let request = VNDetectFaceLandmarksRequest()
        let handler = VNImageRequestHandler(cgImage: cgImage)

        try? handler.perform([request])
        return request.results as? [VNFaceObservation]
    }
}
FaceDetectionView.swift โ€” SwiftUI Integration
import SwiftUI

struct FaceDetectionView: View {
    @State private var detector = FaceDetector()
    @State private var selectedImage: UIImage?
    @State private var showImagePicker = false

    var body: some View {
        VStack(spacing: 20) {
            if let image = selectedImage {
                Image(uiImage: image)
                    .resizable()
                    .scaledToFit()
                    .frame(height: 300)
                    .overlay {
                        // ์–ผ๊ตด ์œ„์น˜์— ์‚ฌ๊ฐํ˜• ํ‘œ์‹œ
                        GeometryReader { geo in
                            ForEach(detector.detectedFaces.indices, id: \.self) { index in
                                let face = detector.detectedFaces[index]
                                let boundingBox = face.boundingBox

                                Rectangle()
                                    .stroke(Color.green, lineWidth: 3)
                                    .frame(
                                        width: boundingBox.width * geo.size.width,
                                        height: boundingBox.height * geo.size.height
                                    )
                                    .position(
                                        x: boundingBox.midX * geo.size.width,
                                        y: (1 - boundingBox.midY) * geo.size.height
                                    )
                            }
                        }
                    }

                Text("๊ฒ€์ถœ๋œ ์–ผ๊ตด: \(detector.detectedFaces.count)๊ฐœ")
                    .font(.headline)
            }

            Button("์‚ฌ์ง„ ์„ ํƒ") {
                showImagePicker = true
            }
            .buttonStyle(.borderedProminent)

            if detector.isProcessing {
                ProgressView("์–ผ๊ตด ๊ฒ€์ถœ ์ค‘...")
            }

            if let error = detector.errorMessage {
                Text(error)
                    .foregroundStyle(.red)
            }
        }
        .padding()
        .sheet(isPresented: $showImagePicker) {
            ImagePicker(image: $selectedImage)
        }
        .onChange(of: selectedImage) { _, newImage in
            if let image = newImage {
                Task {
                    await detector.detectFaces(in: image)
                }
            }
        }
    }
}

๐Ÿ“ 2. ํ…์ŠคํŠธ ์ธ์‹ (OCR)

Automatically detect and recognize text in images. Multi-language supported.

TextRecognizer.swift โ€” OCR Implementation
import Vision

@Observable
class TextRecognizer {
    var recognizedText: String = ""
    var textObservations: [VNRecognizedTextObservation] = []

    func recognizeText(in image: UIImage) async {
        guard let cgImage = image.cgImage else { return }

        let request = VNRecognizeTextRequest { [weak self] request, error in
            guard let observations = request.results as? [VNRecognizedTextObservation] else {
                return
            }

            self?.textObservations = observations

            // ์ธ์‹๋œ ๋ชจ๋“  ํ…์ŠคํŠธ ๊ฒฐํ•ฉ
            let recognizedStrings = observations.compactMap { observation in
                observation.topCandidates(1).first?.string
            }

            self?.recognizedText = recognizedStrings.joined(separator: "\n")
        }

        // ์ธ์‹ ๋ ˆ๋ฒจ ์„ค์ • (accurate = ์ •ํ™•๋„ ์šฐ์„ , fast = ์†๋„ ์šฐ์„ )
        request.recognitionLevel = .accurate

        // ๋‹ค๊ตญ์–ด ์ง€์› (ํ•œ๊ตญ์–ด, ์˜์–ด ๋“ฑ)
        request.recognitionLanguages = ["ko-KR", "en-US"]

        // ์ปค์Šคํ…€ ๋‹จ์–ด ์ถ”๊ฐ€ (ํŠน์ • ์šฉ์–ด ์ธ์‹๋ฅ  ํ–ฅ์ƒ)
        request.customWords = ["SwiftUI", "Vision", "iOS"]

        let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
        try? handler.perform([request])
    }

    // ํŠน์ • ์˜์—ญ์˜ ํ…์ŠคํŠธ๋งŒ ์ธ์‹
    func recognizeText(in image: UIImage, region: CGRect) async -> String? {
        guard let cgImage = image.cgImage else { return nil }

        let request = VNRecognizeTextRequest()
        request.regionOfInterest = region

        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([request])

        return request.results?.first?.topCandidates(1).first?.string
    }
}

๐Ÿ“ท 3. ๋ฐ”์ฝ”๋“œ/QR ์ฝ”๋“œ ์Šค์บ”

Recognize barcodes and QR codes from images or real-time camera.

BarcodeScanner.swift โ€” Barcode Scanning
import Vision
import AVFoundation

@Observable
class BarcodeScanner {
    var detectedCodes: [VNBarcodeObservation] = []

    func scanBarcodes(in image: UIImage) async -> [String] {
        guard let cgImage = image.cgImage else { return [] }

        let request = VNDetectBarcodesRequest { [weak self] request, _ in
            guard let results = request.results as? [VNBarcodeObservation] else { return }
            self?.detectedCodes = results
        }

        // ํŠน์ • ๋ฐ”์ฝ”๋“œ ํƒ€์ž…๋งŒ ์Šค์บ” (QR, EAN-13 ๋“ฑ)
        request.symbologies = [.qr, .ean13, .code128]

        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([request])

        return detectedCodes.compactMap { $0.payloadStringValue }
    }

    // ์‹ค์‹œ๊ฐ„ ์นด๋ฉ”๋ผ ์Šค์บ”์šฉ
    func processCameraFrame(_ sampleBuffer: CMSampleBuffer) {
        guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }

        let request = VNDetectBarcodesRequest { [weak self] request, _ in
            guard let results = request.results as? [VNBarcodeObservation] else { return }
            self?.detectedCodes = results
        }

        let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: [:])
        try? handler.perform([request])
    }
}

๐ŸŽฏ 4. ๊ฐ์ฒด ์ถ”์  (Object Tracking)

๋น„๋””์˜ค์—์„œ ํŠน์ • ๊ฐ์ฒด๋ฅผ ํ”„๋ ˆ์ž„ ๊ฐ„ ์ถ”์ .

ObjectTracker.swift โ€” ๊ฐ์ฒด ์ถ”์ 
import Vision

@Observable
class ObjectTracker {
    private var trackingRequest: VNTrackObjectRequest?
    var trackedObject: VNDetectedObjectObservation?

    // ์ดˆ๊ธฐ ๊ฐ์ฒด ์œ„์น˜๋กœ ์ถ”์  ์‹œ์ž‘
    func startTracking(initialObservation: VNDetectedObjectObservation) {
        trackingRequest = VNTrackObjectRequest(detectedObjectObservation: initialObservation)
        trackingRequest?.trackingLevel = .accurate
    }

    // ๋‹ค์Œ ํ”„๋ ˆ์ž„ ์ฒ˜๋ฆฌ
    func processFrame(_ pixelBuffer: CVPixelBuffer) -> CGRect? {
        guard let request = trackingRequest else { return nil }

        let handler = VNSequenceRequestHandler()
        try? handler.perform([request], on: pixelBuffer)

        guard let observation = request.results?.first as? VNDetectedObjectObservation else {
            return nil
        }

        // ์‹ ๋ขฐ๋„ ์ฒดํฌ
        if observation.confidence > 0.5 {
            trackedObject = observation
            return observation.boundingBox
        }

        return nil
    }

    func stopTracking() {
        trackingRequest = nil
        trackedObject = nil
    }
}

๐Ÿ” 5. ์ด๋ฏธ์ง€ ๋ถ„์„ (Image Analysis)

Analyze image properties (brightness, focus, horizon, etc.).

ImageAnalyzer.swift โ€” ์ด๋ฏธ์ง€ ๋ถ„์„
import Vision

struct ImageAnalysisResult {
    let brightness: Float
    let contrast: Float
    let horizonAngle: CGFloat?
    let isFocused: Bool
}

@Observable
class ImageAnalyzer {
    func analyzeImage(_ image: UIImage) async -> ImageAnalysisResult? {
        guard let cgImage = image.cgImage else { return nil }

        // ์ด๋ฏธ์ง€ ์†์„ฑ ๋ถ„์„
        let featureRequest = VNDetectHorizonRequest()
        let qualityRequest = VNClassifyImageRequest()

        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([featureRequest, qualityRequest])

        let horizonAngle = (featureRequest.results?.first)?.angle

        return ImageAnalysisResult(
            brightness: 0.7,
            contrast: 0.8,
            horizonAngle: horizonAngle,
            isFocused: true
        )
    }

    // ์œค๊ณฝ์„  ๊ฒ€์ถœ
    func detectContours(in image: UIImage) async -> [VNContoursObservation]? {
        guard let cgImage = image.cgImage else { return nil }

        let request = VNDetectContoursRequest()
        request.contrastAdjustment = 2.0
        request.detectsDarkOnLight = true

        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([request])

        return request.results
    }
}

๐Ÿค– 6. Core ML ํ†ตํ•ฉ

Combine Vision and Core ML to run custom models.

VisionMLIntegration.swift โ€” Core ML Integration
import Vision
import CoreML

@Observable
class VisionMLClassifier {
    var classification: String = ""
    var confidence: Float = 0.0

    func classify(image: UIImage, model: MLModel) async {
        guard let cgImage = image.cgImage else { return }

        // Core ML ๋ชจ๋ธ์„ Vision ์š”์ฒญ์œผ๋กœ ๋ž˜ํ•‘
        guard let visionModel = try? VNCoreMLModel(for: model) else { return }

        let request = VNCoreMLRequest(model: visionModel) { [weak self] request, error in
            guard let results = request.results as? [VNClassificationObservation],
                  let topResult = results.first else { return }

            self?.classification = topResult.identifier
            self?.confidence = topResult.confidence
        }

        // ์ด๋ฏธ์ง€ ํฌ๊ธฐ ์ž๋™ ์กฐ์ •
        request.imageCropAndScaleOption = .centerCrop

        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([request])
    }
}

๐Ÿ“ฑ SwiftUI Integration Example

VisionDemoView.swift โ€” Complete Demo
import SwiftUI

struct VisionDemoView: View {
    @State private var selectedImage: UIImage?
    @State private var faceDetector = FaceDetector()
    @State private var textRecognizer = TextRecognizer()
    @State private var barcodeScanner = BarcodeScanner()
    @State private var selectedMode: Mode = .face

    enum Mode: String, CaseIterable {
        case face = "์–ผ๊ตด ์ธ์‹"
        case text = "ํ…์ŠคํŠธ ์ธ์‹"
        case barcode = "๋ฐ”์ฝ”๋“œ ์Šค์บ”"
    }

    var body: some View {
        NavigationStack {
            VStack(spacing: 20) {
                // ๋ชจ๋“œ ์„ ํƒ
                Picker("๋ชจ๋“œ", selection: $selectedMode) {
                    ForEach(Mode.allCases, id: \.self) { mode in
                        Text(mode.rawValue).tag(mode)
                    }
                }
                .pickerStyle(.segmented)
                .padding()

                // ์ด๋ฏธ์ง€ ํ‘œ์‹œ
                if let image = selectedImage {
                    Image(uiImage: image)
                        .resizable()
                        .scaledToFit()
                        .frame(maxHeight: 300)
                }

                // ๊ฒฐ๊ณผ ํ‘œ์‹œ
                resultView

                Spacer()

                Button("์‚ฌ์ง„ ์„ ํƒํ•˜๊ณ  ๋ถ„์„") {
                    // ์‚ฌ์ง„ ์„ ํƒ ๋กœ์ง
                }
                .buttonStyle(.borderedProminent)
            }
            .navigationTitle("Vision ๋ฐ๋ชจ")
        }
    }

    @ViewBuilder
    var resultView: some View {
        switch selectedMode {
        case .face:
            Text("๊ฒ€์ถœ๋œ ์–ผ๊ตด: \(faceDetector.detectedFaces.count)๊ฐœ")
        case .text:
            ScrollView {
                Text(textRecognizer.recognizedText)
                    .padding()
            }
        case .barcode:
            VStack {
                ForEach(barcodeScanner.detectedCodes, id: \.self.uuid) { code in
                    Text(code.payloadStringValue ?? "์•Œ ์ˆ˜ ์—†์Œ")
                }
            }
        }
    }
}

๐Ÿ’ก HIG Guidelines

๐ŸŽฏ Practical Usage

๐Ÿ“š Learn More

โšก๏ธ Performance Tips: Using VNSequenceRequestHandler improves performance for continuous frame processing. Use .fast accuracy for real-time camera processing.

๐Ÿ“Ž Apple Official Resources

๐Ÿ“˜ Documentation ๐Ÿ’ป Sample Code ๐ŸŽฌ WWDC Sessions