๐๏ธโ๐จ๏ธ Vision ํ๋ ์์ํฌ
โญ Difficulty: โญโญโญ
โฑ๏ธ Est. Time: 2-3h
๐ App Services
์ปดํจํฐ ๋น์ ์ผ๋ก ์ด๋ฏธ์ง์ ๋น๋์ค ๋ถ์ํ๊ธฐ
iOS 11+visionOS Supported
โจ Vision is?
Vision is Apple's computer vision framework, providing powerful image/video analysis features including face detection, text recognition, barcode scanning, and object tracking. It integrates with Core ML to run custom machine learning models.
๐ก Key Features: Face/Body Detection ยท Text Recognition (OCR) ยท Barcode Scanning ยท Object Tracking ยท Contour Detection ยท Image Alignment ยท Core ML Integration
๐ฏ 1. ์ผ๊ตด ์ธ์ (Face Detection)
Find faces in images and detect landmarks (eyes, nose, mouth, etc.).
FaceDetector.swift โ ์ผ๊ตด ์ธ์
import Vision import UIKit @Observable class FaceDetector { var detectedFaces: [VNFaceObservation] = [] var isProcessing = false var errorMessage: String? func detectFaces(in image: UIImage) async { isProcessing = true errorMessage = nil guard let cgImage = image.cgImage else { errorMessage = "์ด๋ฏธ์ง ๋ณํ ์คํจ" isProcessing = false return } // Face detection request let request = VNDetectFaceRectanglesRequest { [weak self] request, error in guard let observations = request.results as? [VNFaceObservation] else { self?.errorMessage = error?.localizedDescription ?? "์ผ๊ตด ๊ฒ์ถ ์คํจ" return } self?.detectedFaces = observations } // Perform request let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) do { try handler.perform([request]) } catch { errorMessage = error.localizedDescription } isProcessing = false } // ์ผ๊ตด ๋๋๋งํฌ ๊ฒ์ถ (๋, ์ฝ, ์ ๋ฑ) func detectFaceLandmarks(in image: UIImage) async -> [VNFaceObservation]? { guard let cgImage = image.cgImage else { return nil } let request = VNDetectFaceLandmarksRequest() let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([request]) return request.results as? [VNFaceObservation] } }
FaceDetectionView.swift โ SwiftUI Integration
import SwiftUI struct FaceDetectionView: View { @State private var detector = FaceDetector() @State private var selectedImage: UIImage? @State private var showImagePicker = false var body: some View { VStack(spacing: 20) { if let image = selectedImage { Image(uiImage: image) .resizable() .scaledToFit() .frame(height: 300) .overlay { // ์ผ๊ตด ์์น์ ์ฌ๊ฐํ ํ์ GeometryReader { geo in ForEach(detector.detectedFaces.indices, id: \.self) { index in let face = detector.detectedFaces[index] let boundingBox = face.boundingBox Rectangle() .stroke(Color.green, lineWidth: 3) .frame( width: boundingBox.width * geo.size.width, height: boundingBox.height * geo.size.height ) .position( x: boundingBox.midX * geo.size.width, y: (1 - boundingBox.midY) * geo.size.height ) } } } Text("๊ฒ์ถ๋ ์ผ๊ตด: \(detector.detectedFaces.count)๊ฐ") .font(.headline) } Button("์ฌ์ง ์ ํ") { showImagePicker = true } .buttonStyle(.borderedProminent) if detector.isProcessing { ProgressView("์ผ๊ตด ๊ฒ์ถ ์ค...") } if let error = detector.errorMessage { Text(error) .foregroundStyle(.red) } } .padding() .sheet(isPresented: $showImagePicker) { ImagePicker(image: $selectedImage) } .onChange(of: selectedImage) { _, newImage in if let image = newImage { Task { await detector.detectFaces(in: image) } } } } }
๐ 2. ํ ์คํธ ์ธ์ (OCR)
Automatically detect and recognize text in images. Multi-language supported.
TextRecognizer.swift โ OCR Implementation
import Vision @Observable class TextRecognizer { var recognizedText: String = "" var textObservations: [VNRecognizedTextObservation] = [] func recognizeText(in image: UIImage) async { guard let cgImage = image.cgImage else { return } let request = VNRecognizeTextRequest { [weak self] request, error in guard let observations = request.results as? [VNRecognizedTextObservation] else { return } self?.textObservations = observations // ์ธ์๋ ๋ชจ๋ ํ ์คํธ ๊ฒฐํฉ let recognizedStrings = observations.compactMap { observation in observation.topCandidates(1).first?.string } self?.recognizedText = recognizedStrings.joined(separator: "\n") } // ์ธ์ ๋ ๋ฒจ ์ค์ (accurate = ์ ํ๋ ์ฐ์ , fast = ์๋ ์ฐ์ ) request.recognitionLevel = .accurate // ๋ค๊ตญ์ด ์ง์ (ํ๊ตญ์ด, ์์ด ๋ฑ) request.recognitionLanguages = ["ko-KR", "en-US"] // ์ปค์คํ ๋จ์ด ์ถ๊ฐ (ํน์ ์ฉ์ด ์ธ์๋ฅ ํฅ์) request.customWords = ["SwiftUI", "Vision", "iOS"] let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) try? handler.perform([request]) } // ํน์ ์์ญ์ ํ ์คํธ๋ง ์ธ์ func recognizeText(in image: UIImage, region: CGRect) async -> String? { guard let cgImage = image.cgImage else { return nil } let request = VNRecognizeTextRequest() request.regionOfInterest = region let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([request]) return request.results?.first?.topCandidates(1).first?.string } }
๐ท 3. ๋ฐ์ฝ๋/QR ์ฝ๋ ์ค์บ
Recognize barcodes and QR codes from images or real-time camera.
BarcodeScanner.swift โ Barcode Scanning
import Vision import AVFoundation @Observable class BarcodeScanner { var detectedCodes: [VNBarcodeObservation] = [] func scanBarcodes(in image: UIImage) async -> [String] { guard let cgImage = image.cgImage else { return [] } let request = VNDetectBarcodesRequest { [weak self] request, _ in guard let results = request.results as? [VNBarcodeObservation] else { return } self?.detectedCodes = results } // ํน์ ๋ฐ์ฝ๋ ํ์ ๋ง ์ค์บ (QR, EAN-13 ๋ฑ) request.symbologies = [.qr, .ean13, .code128] let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([request]) return detectedCodes.compactMap { $0.payloadStringValue } } // ์ค์๊ฐ ์นด๋ฉ๋ผ ์ค์บ์ฉ func processCameraFrame(_ sampleBuffer: CMSampleBuffer) { guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return } let request = VNDetectBarcodesRequest { [weak self] request, _ in guard let results = request.results as? [VNBarcodeObservation] else { return } self?.detectedCodes = results } let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: [:]) try? handler.perform([request]) } }
๐ฏ 4. ๊ฐ์ฒด ์ถ์ (Object Tracking)
๋น๋์ค์์ ํน์ ๊ฐ์ฒด๋ฅผ ํ๋ ์ ๊ฐ ์ถ์ .
ObjectTracker.swift โ ๊ฐ์ฒด ์ถ์
import Vision @Observable class ObjectTracker { private var trackingRequest: VNTrackObjectRequest? var trackedObject: VNDetectedObjectObservation? // ์ด๊ธฐ ๊ฐ์ฒด ์์น๋ก ์ถ์ ์์ func startTracking(initialObservation: VNDetectedObjectObservation) { trackingRequest = VNTrackObjectRequest(detectedObjectObservation: initialObservation) trackingRequest?.trackingLevel = .accurate } // ๋ค์ ํ๋ ์ ์ฒ๋ฆฌ func processFrame(_ pixelBuffer: CVPixelBuffer) -> CGRect? { guard let request = trackingRequest else { return nil } let handler = VNSequenceRequestHandler() try? handler.perform([request], on: pixelBuffer) guard let observation = request.results?.first as? VNDetectedObjectObservation else { return nil } // ์ ๋ขฐ๋ ์ฒดํฌ if observation.confidence > 0.5 { trackedObject = observation return observation.boundingBox } return nil } func stopTracking() { trackingRequest = nil trackedObject = nil } }
๐ 5. ์ด๋ฏธ์ง ๋ถ์ (Image Analysis)
Analyze image properties (brightness, focus, horizon, etc.).
ImageAnalyzer.swift โ ์ด๋ฏธ์ง ๋ถ์
import Vision struct ImageAnalysisResult { let brightness: Float let contrast: Float let horizonAngle: CGFloat? let isFocused: Bool } @Observable class ImageAnalyzer { func analyzeImage(_ image: UIImage) async -> ImageAnalysisResult? { guard let cgImage = image.cgImage else { return nil } // ์ด๋ฏธ์ง ์์ฑ ๋ถ์ let featureRequest = VNDetectHorizonRequest() let qualityRequest = VNClassifyImageRequest() let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([featureRequest, qualityRequest]) let horizonAngle = (featureRequest.results?.first)?.angle return ImageAnalysisResult( brightness: 0.7, contrast: 0.8, horizonAngle: horizonAngle, isFocused: true ) } // ์ค๊ณฝ์ ๊ฒ์ถ func detectContours(in image: UIImage) async -> [VNContoursObservation]? { guard let cgImage = image.cgImage else { return nil } let request = VNDetectContoursRequest() request.contrastAdjustment = 2.0 request.detectsDarkOnLight = true let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([request]) return request.results } }
๐ค 6. Core ML ํตํฉ
Combine Vision and Core ML to run custom models.
VisionMLIntegration.swift โ Core ML Integration
import Vision import CoreML @Observable class VisionMLClassifier { var classification: String = "" var confidence: Float = 0.0 func classify(image: UIImage, model: MLModel) async { guard let cgImage = image.cgImage else { return } // Core ML ๋ชจ๋ธ์ Vision ์์ฒญ์ผ๋ก ๋ํ guard let visionModel = try? VNCoreMLModel(for: model) else { return } let request = VNCoreMLRequest(model: visionModel) { [weak self] request, error in guard let results = request.results as? [VNClassificationObservation], let topResult = results.first else { return } self?.classification = topResult.identifier self?.confidence = topResult.confidence } // ์ด๋ฏธ์ง ํฌ๊ธฐ ์๋ ์กฐ์ request.imageCropAndScaleOption = .centerCrop let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([request]) } }
๐ฑ SwiftUI Integration Example
VisionDemoView.swift โ Complete Demo
import SwiftUI struct VisionDemoView: View { @State private var selectedImage: UIImage? @State private var faceDetector = FaceDetector() @State private var textRecognizer = TextRecognizer() @State private var barcodeScanner = BarcodeScanner() @State private var selectedMode: Mode = .face enum Mode: String, CaseIterable { case face = "์ผ๊ตด ์ธ์" case text = "ํ ์คํธ ์ธ์" case barcode = "๋ฐ์ฝ๋ ์ค์บ" } var body: some View { NavigationStack { VStack(spacing: 20) { // ๋ชจ๋ ์ ํ Picker("๋ชจ๋", selection: $selectedMode) { ForEach(Mode.allCases, id: \.self) { mode in Text(mode.rawValue).tag(mode) } } .pickerStyle(.segmented) .padding() // ์ด๋ฏธ์ง ํ์ if let image = selectedImage { Image(uiImage: image) .resizable() .scaledToFit() .frame(maxHeight: 300) } // ๊ฒฐ๊ณผ ํ์ resultView Spacer() Button("์ฌ์ง ์ ํํ๊ณ ๋ถ์") { // ์ฌ์ง ์ ํ ๋ก์ง } .buttonStyle(.borderedProminent) } .navigationTitle("Vision ๋ฐ๋ชจ") } } @ViewBuilder var resultView: some View { switch selectedMode { case .face: Text("๊ฒ์ถ๋ ์ผ๊ตด: \(faceDetector.detectedFaces.count)๊ฐ") case .text: ScrollView { Text(textRecognizer.recognizedText) .padding() } case .barcode: VStack { ForEach(barcodeScanner.detectedCodes, id: \.self.uuid) { code in Text(code.payloadStringValue ?? "์ ์ ์์") } } } } }
๐ก HIG Guidelines
- Permission Request: ์นด๋ฉ๋ผ ์ฌ์ฉ ์ In Info.plist,
NSCameraUsageDescription์ถ๊ฐ - ์ฑ๋ฅ: ๋ฐฑ๊ทธ๋ผ์ด๋ ํ์์ Vision ์์ฒญ ์ฒ๋ฆฌ
- ํผ๋๋ฐฑ: ์ฒ๋ฆฌ ์ค์์ ์ฌ์ฉ์์๊ฒ ๋ช ํํ ํ์
- ์ ํ๋: Prevent false detections with confidence threshold settings
- Privacy: ์ผ๊ตด ์ธ์ ๋ฐ์ดํฐ๋ ๊ธฐ๊ธฐ์๋ง ์ ์ฅ
๐ฏ Practical Usage
- ๋ฌธ์ ์ค์บ๋: OCR๋ก ์์์ฆ, ๋ช ํจ, ๋ฌธ์ ํ ์คํธ ์ถ์ถ
- AR Filters: ์ผ๊ตด ๋๋๋งํฌ๋ก ์ค์๊ฐ AR ์ดํํธ
- QR ์ฒดํฌ์ธ: ์ด๋ฒคํธ ์ ์ฅ๊ถ ์๋ ์ค์บ
- ์ํ ์ธ์: ๋ฐ์ฝ๋ ์ค์บ์ผ๋ก ๊ฐ๊ฒฉ ๋น๊ต
- ๋น๋์ค ๋ถ์: ์์ ์ ๊ฐ์ฒด ์ถ์ ๋ฐ ๋ถ๋ฅ
๐ Learn More
- Vision Framework ๊ณต์ ๋ฌธ์
- WWDC: Explore 3D body pose and person segmentation
- Recognizing Text in Images
- Tracking Objects in Video
โก๏ธ Performance Tips: Using VNSequenceRequestHandler improves performance for continuous frame processing. Use .fast accuracy for real-time camera processing.