๐๏ธโ๐จ๏ธ Vision ํ๋ ์์ํฌ
์ปดํจํฐ ๋น์ ์ผ๋ก ์ด๋ฏธ์ง์ ๋น๋์ค ๋ถ์ํ๊ธฐ
โจ Vision์ด๋?
Vision์ Apple์ ์ปดํจํฐ ๋น์ ํ๋ ์์ํฌ๋ก, ์ผ๊ตด ์ธ์, ํ ์คํธ ๊ฒ์ถ, ๋ฐ์ฝ๋ ์ค์บ, ๊ฐ์ฒด ์ถ์ ๋ฑ ๊ฐ๋ ฅํ ์ด๋ฏธ์ง/๋น๋์ค ๋ถ์ ๊ธฐ๋ฅ์ ์ ๊ณตํฉ๋๋ค. Core ML๊ณผ ํตํฉํ์ฌ ์ปค์คํ ๋จธ์ ๋ฌ๋ ๋ชจ๋ธ๋ ์คํํ ์ ์์ต๋๋ค.
๐ฏ 1. ์ผ๊ตด ์ธ์ (Face Detection)
์ด๋ฏธ์ง์์ ์ผ๊ตด์ ์ฐพ๊ณ ๋๋๋งํฌ(๋, ์ฝ, ์ ๋ฑ)๋ฅผ ๊ฒ์ถํฉ๋๋ค.
import Vision import UIKit @Observable class FaceDetector { var detectedFaces: [VNFaceObservation] = [] var isProcessing = false var errorMessage: String? func detectFaces(in image: UIImage) async { isProcessing = true errorMessage = nil guard let cgImage = image.cgImage else { errorMessage = "์ด๋ฏธ์ง ๋ณํ ์คํจ" isProcessing = false return } // Face detection request let request = VNDetectFaceRectanglesRequest { [weak self] request, error in guard let observations = request.results as? [VNFaceObservation] else { self?.errorMessage = error?.localizedDescription ?? "์ผ๊ตด ๊ฒ์ถ ์คํจ" return } self?.detectedFaces = observations } // Perform request let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) do { try handler.perform([request]) } catch { errorMessage = error.localizedDescription } isProcessing = false } // ์ผ๊ตด ๋๋๋งํฌ ๊ฒ์ถ (๋, ์ฝ, ์ ๋ฑ) func detectFaceLandmarks(in image: UIImage) async -> [VNFaceObservation]? { guard let cgImage = image.cgImage else { return nil } let request = VNDetectFaceLandmarksRequest() let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([request]) return request.results as? [VNFaceObservation] } }
import SwiftUI struct FaceDetectionView: View { @State private var detector = FaceDetector() @State private var selectedImage: UIImage? @State private var showImagePicker = false var body: some View { VStack(spacing: 20) { if let image = selectedImage { Image(uiImage: image) .resizable() .scaledToFit() .frame(height: 300) .overlay { // ์ผ๊ตด ์์น์ ์ฌ๊ฐํ ํ์ GeometryReader { geo in ForEach(detector.detectedFaces.indices, id: \.self) { index in let face = detector.detectedFaces[index] let boundingBox = face.boundingBox Rectangle() .stroke(Color.green, lineWidth: 3) .frame( width: boundingBox.width * geo.size.width, height: boundingBox.height * geo.size.height ) .position( x: boundingBox.midX * geo.size.width, y: (1 - boundingBox.midY) * geo.size.height ) } } } Text("๊ฒ์ถ๋ ์ผ๊ตด: \(detector.detectedFaces.count)๊ฐ") .font(.headline) } Button("์ฌ์ง ์ ํ") { showImagePicker = true } .buttonStyle(.borderedProminent) if detector.isProcessing { ProgressView("์ผ๊ตด ๊ฒ์ถ ์ค...") } if let error = detector.errorMessage { Text(error) .foregroundStyle(.red) } } .padding() .sheet(isPresented: $showImagePicker) { ImagePicker(image: $selectedImage) } .onChange(of: selectedImage) { _, newImage in if let image = newImage { Task { await detector.detectFaces(in: image) } } } } }
๐ 2. ํ ์คํธ ์ธ์ (OCR)
์ด๋ฏธ์ง ์ ํ ์คํธ๋ฅผ ์๋์ผ๋ก ๊ฒ์ถํ๊ณ ์ธ์ํฉ๋๋ค. ๋ค๊ตญ์ด ์ง์.
import Vision @Observable class TextRecognizer { var recognizedText: String = "" var textObservations: [VNRecognizedTextObservation] = [] func recognizeText(in image: UIImage) async { guard let cgImage = image.cgImage else { return } let request = VNRecognizeTextRequest { [weak self] request, error in guard let observations = request.results as? [VNRecognizedTextObservation] else { return } self?.textObservations = observations // ์ธ์๋ ๋ชจ๋ ํ ์คํธ ๊ฒฐํฉ let recognizedStrings = observations.compactMap { observation in observation.topCandidates(1).first?.string } self?.recognizedText = recognizedStrings.joined(separator: "\n") } // ์ธ์ ๋ ๋ฒจ ์ค์ (accurate = ์ ํ๋ ์ฐ์ , fast = ์๋ ์ฐ์ ) request.recognitionLevel = .accurate // ๋ค๊ตญ์ด ์ง์ (ํ๊ตญ์ด, ์์ด ๋ฑ) request.recognitionLanguages = ["ko-KR", "en-US"] // ์ปค์คํ ๋จ์ด ์ถ๊ฐ (ํน์ ์ฉ์ด ์ธ์๋ฅ ํฅ์) request.customWords = ["SwiftUI", "Vision", "iOS"] let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) try? handler.perform([request]) } // ํน์ ์์ญ์ ํ ์คํธ๋ง ์ธ์ func recognizeText(in image: UIImage, region: CGRect) async -> String? { guard let cgImage = image.cgImage else { return nil } let request = VNRecognizeTextRequest() request.regionOfInterest = region let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([request]) return request.results?.first?.topCandidates(1).first?.string } }
๐ท 3. ๋ฐ์ฝ๋/QR ์ฝ๋ ์ค์บ
์ด๋ฏธ์ง๋ ์ค์๊ฐ ์นด๋ฉ๋ผ์์ ๋ฐ์ฝ๋์ QR ์ฝ๋๋ฅผ ์ธ์ํฉ๋๋ค.
import Vision import AVFoundation @Observable class BarcodeScanner { var detectedCodes: [VNBarcodeObservation] = [] func scanBarcodes(in image: UIImage) async -> [String] { guard let cgImage = image.cgImage else { return [] } let request = VNDetectBarcodesRequest { [weak self] request, _ in guard let results = request.results as? [VNBarcodeObservation] else { return } self?.detectedCodes = results } // ํน์ ๋ฐ์ฝ๋ ํ์ ๋ง ์ค์บ (QR, EAN-13 ๋ฑ) request.symbologies = [.qr, .ean13, .code128] let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([request]) return detectedCodes.compactMap { $0.payloadStringValue } } // ์ค์๊ฐ ์นด๋ฉ๋ผ ์ค์บ์ฉ func processCameraFrame(_ sampleBuffer: CMSampleBuffer) { guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return } let request = VNDetectBarcodesRequest { [weak self] request, _ in guard let results = request.results as? [VNBarcodeObservation] else { return } self?.detectedCodes = results } let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, options: [:]) try? handler.perform([request]) } }
๐ฏ 4. ๊ฐ์ฒด ์ถ์ (Object Tracking)
๋น๋์ค์์ ํน์ ๊ฐ์ฒด๋ฅผ ํ๋ ์ ๊ฐ ์ถ์ ํฉ๋๋ค.
import Vision @Observable class ObjectTracker { private var trackingRequest: VNTrackObjectRequest? var trackedObject: VNDetectedObjectObservation? // ์ด๊ธฐ ๊ฐ์ฒด ์์น๋ก ์ถ์ ์์ func startTracking(initialObservation: VNDetectedObjectObservation) { trackingRequest = VNTrackObjectRequest(detectedObjectObservation: initialObservation) trackingRequest?.trackingLevel = .accurate } // ๋ค์ ํ๋ ์ ์ฒ๋ฆฌ func processFrame(_ pixelBuffer: CVPixelBuffer) -> CGRect? { guard let request = trackingRequest else { return nil } let handler = VNSequenceRequestHandler() try? handler.perform([request], on: pixelBuffer) guard let observation = request.results?.first as? VNDetectedObjectObservation else { return nil } // ์ ๋ขฐ๋ ์ฒดํฌ if observation.confidence > 0.5 { trackedObject = observation return observation.boundingBox } return nil } func stopTracking() { trackingRequest = nil trackedObject = nil } }
๐ 5. ์ด๋ฏธ์ง ๋ถ์ (Image Analysis)
์ด๋ฏธ์ง์ ์์ฑ์ ๋ถ์ํฉ๋๋ค (๋ฐ๊ธฐ, ์ด์ , ์ํ์ ๋ฑ).
import Vision struct ImageAnalysisResult { let brightness: Float let contrast: Float let horizonAngle: CGFloat? let isFocused: Bool } @Observable class ImageAnalyzer { func analyzeImage(_ image: UIImage) async -> ImageAnalysisResult? { guard let cgImage = image.cgImage else { return nil } // ์ด๋ฏธ์ง ์์ฑ ๋ถ์ let featureRequest = VNDetectHorizonRequest() let qualityRequest = VNClassifyImageRequest() let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([featureRequest, qualityRequest]) let horizonAngle = (featureRequest.results?.first)?.angle return ImageAnalysisResult( brightness: 0.7, contrast: 0.8, horizonAngle: horizonAngle, isFocused: true ) } // ์ค๊ณฝ์ ๊ฒ์ถ func detectContours(in image: UIImage) async -> [VNContoursObservation]? { guard let cgImage = image.cgImage else { return nil } let request = VNDetectContoursRequest() request.contrastAdjustment = 2.0 request.detectsDarkOnLight = true let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([request]) return request.results } }
๐ค 6. Core ML ํตํฉ
Vision๊ณผ Core ML์ ๊ฒฐํฉํ์ฌ ์ปค์คํ ๋ชจ๋ธ์ ์คํํฉ๋๋ค.
import Vision import CoreML @Observable class VisionMLClassifier { var classification: String = "" var confidence: Float = 0.0 func classify(image: UIImage, model: MLModel) async { guard let cgImage = image.cgImage else { return } // Core ML ๋ชจ๋ธ์ Vision ์์ฒญ์ผ๋ก ๋ํ guard let visionModel = try? VNCoreMLModel(for: model) else { return } let request = VNCoreMLRequest(model: visionModel) { [weak self] request, error in guard let results = request.results as? [VNClassificationObservation], let topResult = results.first else { return } self?.classification = topResult.identifier self?.confidence = topResult.confidence } // ์ด๋ฏธ์ง ํฌ๊ธฐ ์๋ ์กฐ์ request.imageCropAndScaleOption = .centerCrop let handler = VNImageRequestHandler(cgImage: cgImage) try? handler.perform([request]) } }
๐ฑ SwiftUI ํตํฉ ์์
import SwiftUI struct VisionDemoView: View { @State private var selectedImage: UIImage? @State private var faceDetector = FaceDetector() @State private var textRecognizer = TextRecognizer() @State private var barcodeScanner = BarcodeScanner() @State private var selectedMode: Mode = .face enum Mode: String, CaseIterable { case face = "์ผ๊ตด ์ธ์" case text = "ํ ์คํธ ์ธ์" case barcode = "๋ฐ์ฝ๋ ์ค์บ" } var body: some View { NavigationStack { VStack(spacing: 20) { // ๋ชจ๋ ์ ํ Picker("๋ชจ๋", selection: $selectedMode) { ForEach(Mode.allCases, id: \.self) { mode in Text(mode.rawValue).tag(mode) } } .pickerStyle(.segmented) .padding() // ์ด๋ฏธ์ง ํ์ if let image = selectedImage { Image(uiImage: image) .resizable() .scaledToFit() .frame(maxHeight: 300) } // ๊ฒฐ๊ณผ ํ์ resultView Spacer() Button("์ฌ์ง ์ ํํ๊ณ ๋ถ์") { // ์ฌ์ง ์ ํ ๋ก์ง } .buttonStyle(.borderedProminent) } .navigationTitle("Vision ๋ฐ๋ชจ") } } @ViewBuilder var resultView: some View { switch selectedMode { case .face: Text("๊ฒ์ถ๋ ์ผ๊ตด: \(faceDetector.detectedFaces.count)๊ฐ") case .text: ScrollView { Text(textRecognizer.recognizedText) .padding() } case .barcode: VStack { ForEach(barcodeScanner.detectedCodes, id: \.self.uuid) { code in Text(code.payloadStringValue ?? "์ ์ ์์") } } } } }
๐ก HIG ๊ฐ์ด๋๋ผ์ธ
- ๊ถํ ์์ฒญ: ์นด๋ฉ๋ผ ์ฌ์ฉ ์ Info.plist์
NSCameraUsageDescription์ถ๊ฐ - ์ฑ๋ฅ: ๋ฐฑ๊ทธ๋ผ์ด๋ ํ์์ Vision ์์ฒญ ์ฒ๋ฆฌ
- ํผ๋๋ฐฑ: ์ฒ๋ฆฌ ์ค์์ ์ฌ์ฉ์์๊ฒ ๋ช ํํ ํ์
- ์ ํ๋: ์ ๋ขฐ๋(confidence) ์๊ณ๊ฐ ์ค์ ์ผ๋ก ์ค๊ฒ์ถ ๋ฐฉ์ง
- ํ๋ผ์ด๋ฒ์: ์ผ๊ตด ์ธ์ ๋ฐ์ดํฐ๋ ๊ธฐ๊ธฐ์๋ง ์ ์ฅ
๐ฏ ์ค์ ํ์ฉ
- ๋ฌธ์ ์ค์บ๋: OCR๋ก ์์์ฆ, ๋ช ํจ, ๋ฌธ์ ํ ์คํธ ์ถ์ถ
- AR ํํฐ: ์ผ๊ตด ๋๋๋งํฌ๋ก ์ค์๊ฐ AR ์ดํํธ
- QR ์ฒดํฌ์ธ: ์ด๋ฒคํธ ์ ์ฅ๊ถ ์๋ ์ค์บ
- ์ํ ์ธ์: ๋ฐ์ฝ๋ ์ค์บ์ผ๋ก ๊ฐ๊ฒฉ ๋น๊ต
- ๋น๋์ค ๋ถ์: ์์ ์ ๊ฐ์ฒด ์ถ์ ๋ฐ ๋ถ๋ฅ
๐ ๋ ์์๋ณด๊ธฐ
- Vision Framework ๊ณต์ ๋ฌธ์
- WWDC: Explore 3D body pose and person segmentation
- Recognizing Text in Images
- Tracking Objects in Video