mirror of
https://github.com/selfxyz/self.git
synced 2026-02-09 22:06:22 -05:00
321 lines
12 KiB
Swift
321 lines
12 KiB
Swift
/*
|
||
See LICENSE folder for this sample’s licensing information.
|
||
|
||
Abstract:
|
||
Main view controller: handles camera, preview and cutout UI.
|
||
*/
|
||
|
||
import UIKit
|
||
import AVFoundation
|
||
import Vision
|
||
|
||
public class ViewController: UIViewController {
|
||
// MARK: - UI objects
|
||
var previewView: PreviewView!
|
||
var cutoutView: UIView!
|
||
var numberView: UILabel!
|
||
|
||
var maskLayer = CAShapeLayer()
|
||
// Device orientation. Updated whenever the orientation changes to a
|
||
// different supported orientation.
|
||
var currentOrientation = UIDeviceOrientation.portrait
|
||
|
||
// MARK: - Capture related objects
|
||
private let captureSession = AVCaptureSession()
|
||
let captureSessionQueue = DispatchQueue(label: "com.example.apple-samplecode.CaptureSessionQueue")
|
||
|
||
var captureDevice: AVCaptureDevice?
|
||
|
||
var videoDataOutput = AVCaptureVideoDataOutput()
|
||
let videoDataOutputQueue = DispatchQueue(label: "com.example.apple-samplecode.VideoDataOutputQueue")
|
||
|
||
// MARK: - Region of interest (ROI) and text orientation
|
||
// Region of video data output buffer that recognition should be run on.
|
||
// Gets recalculated once the bounds of the preview layer are known.
|
||
var regionOfInterest = CGRect(x: 0, y: 0, width: 1, height: 1)
|
||
// Orientation of text to search for in the region of interest.
|
||
var textOrientation = CGImagePropertyOrientation.up
|
||
|
||
// MARK: - Coordinate transforms
|
||
var bufferAspectRatio: Double!
|
||
// Transform from UI orientation to buffer orientation.
|
||
var uiRotationTransform = CGAffineTransform.identity
|
||
// Transform bottom-left coordinates to top-left.
|
||
var bottomToTopTransform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -1)
|
||
// Transform coordinates in ROI to global coordinates (still normalized).
|
||
var roiToGlobalTransform = CGAffineTransform.identity
|
||
|
||
// Vision -> AVF coordinate transform.
|
||
var visionToAVFTransform = CGAffineTransform.identity
|
||
|
||
// MARK: - View controller methods
|
||
|
||
public override func viewDidLoad() {
|
||
super.viewDidLoad()
|
||
|
||
// Create views
|
||
previewView = PreviewView()
|
||
cutoutView = UIView()
|
||
numberView = UILabel()
|
||
previewView.translatesAutoresizingMaskIntoConstraints = false
|
||
cutoutView.translatesAutoresizingMaskIntoConstraints = false
|
||
numberView.translatesAutoresizingMaskIntoConstraints = false
|
||
|
||
self.view.addSubview(previewView)
|
||
self.view.addSubview(cutoutView)
|
||
self.view.addSubview(numberView)
|
||
previewView.centerXAnchor.constraint(equalTo: view.centerXAnchor).isActive = true
|
||
previewView.trailingAnchor.constraint(equalTo: view.trailingAnchor).isActive = true
|
||
previewView.centerYAnchor.constraint(equalTo: view.centerYAnchor).isActive = true
|
||
previewView.bottomAnchor.constraint(equalTo: view.bottomAnchor).isActive = true
|
||
|
||
cutoutView.centerXAnchor.constraint(equalTo: view.centerXAnchor).isActive = true
|
||
cutoutView.trailingAnchor.constraint(equalTo: view.trailingAnchor).isActive = true
|
||
cutoutView.centerYAnchor.constraint(equalTo: view.centerYAnchor).isActive = true
|
||
cutoutView.bottomAnchor.constraint(equalTo: view.bottomAnchor).isActive = true
|
||
|
||
numberView.centerXAnchor.constraint(equalTo: view.centerXAnchor).isActive = true
|
||
numberView.centerYAnchor.constraint(equalTo: view.centerYAnchor).isActive = true
|
||
|
||
let tgr = UITapGestureRecognizer(target: self, action: #selector(handleTap(_:)))
|
||
self.view.addGestureRecognizer(tgr)
|
||
tgr.cancelsTouchesInView = true
|
||
tgr.delaysTouchesEnded = true
|
||
tgr.requiresExclusiveTouchType = true
|
||
|
||
// Set up preview view.
|
||
previewView.session = captureSession
|
||
|
||
// Set up cutout view.
|
||
cutoutView.backgroundColor = UIColor.gray.withAlphaComponent(0.5)
|
||
maskLayer.backgroundColor = UIColor.clear.cgColor
|
||
maskLayer.fillRule = .evenOdd
|
||
cutoutView.layer.mask = maskLayer
|
||
|
||
// Starting the capture session is a blocking call. Perform setup using
|
||
// a dedicated serial dispatch queue to prevent blocking the main thread.
|
||
captureSessionQueue.async {
|
||
self.setupCamera()
|
||
|
||
// Calculate region of interest now that the camera is setup.
|
||
DispatchQueue.main.async {
|
||
// Figure out initial ROI.
|
||
self.calculateRegionOfInterest()
|
||
}
|
||
}
|
||
}
|
||
|
||
public override func viewWillTransition(to size: CGSize, with coordinator: UIViewControllerTransitionCoordinator) {
|
||
super.viewWillTransition(to: size, with: coordinator)
|
||
|
||
// Only change the current orientation if the new one is landscape or
|
||
// portrait. You can't really do anything about flat or unknown.
|
||
let deviceOrientation = UIDevice.current.orientation
|
||
if deviceOrientation.isPortrait || deviceOrientation.isLandscape {
|
||
currentOrientation = deviceOrientation
|
||
}
|
||
|
||
// Handle device orientation in the preview layer.
|
||
if let videoPreviewLayerConnection = previewView.videoPreviewLayer.connection {
|
||
if let newVideoOrientation = AVCaptureVideoOrientation(deviceOrientation: deviceOrientation) {
|
||
videoPreviewLayerConnection.videoOrientation = newVideoOrientation
|
||
}
|
||
}
|
||
|
||
// Orientation changed: figure out new region of interest (ROI).
|
||
calculateRegionOfInterest()
|
||
}
|
||
|
||
public override func viewDidLayoutSubviews() {
|
||
super.viewDidLayoutSubviews()
|
||
updateCutout()
|
||
}
|
||
|
||
// MARK: - Setup
|
||
|
||
func calculateRegionOfInterest() {
|
||
// In landscape orientation the desired ROI is specified as the ratio of
|
||
// buffer width to height. When the UI is rotated to portrait, keep the
|
||
// vertical size the same (in buffer pixels). Also try to keep the
|
||
// horizontal size the same up to a maximum ratio.
|
||
let desiredHeightRatio = 0.2
|
||
let desiredWidthRatio = 0.8
|
||
let maxPortraitWidth = 0.9
|
||
// Figure out size of ROI.
|
||
let size: CGSize
|
||
if currentOrientation.isPortrait || currentOrientation == .unknown {
|
||
size = CGSize(width: min(desiredWidthRatio * bufferAspectRatio, maxPortraitWidth), height: desiredHeightRatio / bufferAspectRatio)
|
||
} else {
|
||
size = CGSize(width: desiredWidthRatio, height: desiredHeightRatio)
|
||
}
|
||
// Make it centered.
|
||
regionOfInterest.origin = CGPoint(x: (1 - size.width) / 2, y: (1 - size.height) / 2)
|
||
regionOfInterest.size = size
|
||
|
||
// ROI changed, update transform.
|
||
setupOrientationAndTransform()
|
||
|
||
// Update the cutout to match the new ROI.
|
||
DispatchQueue.main.async {
|
||
// Wait for the next run cycle before updating the cutout. This
|
||
// ensures that the preview layer already has its new orientation.
|
||
self.updateCutout()
|
||
}
|
||
}
|
||
|
||
func updateCutout() {
|
||
// Figure out where the cutout ends up in layer coordinates.
|
||
let roiRectTransform = bottomToTopTransform.concatenating(uiRotationTransform)
|
||
let cutout = previewView.videoPreviewLayer.layerRectConverted(fromMetadataOutputRect: regionOfInterest.applying(roiRectTransform))
|
||
|
||
// Create the mask.
|
||
let path = UIBezierPath(rect: cutoutView.frame)
|
||
path.append(UIBezierPath(rect: cutout))
|
||
maskLayer.path = path.cgPath
|
||
|
||
// Move the number view down to under cutout.
|
||
var numFrame = cutout
|
||
numFrame.origin.y += numFrame.size.height
|
||
numberView.frame = numFrame
|
||
numberView.numberOfLines = 3
|
||
numberView.font = numberView.font.withSize(9)
|
||
|
||
}
|
||
|
||
func setupOrientationAndTransform() {
|
||
// Recalculate the affine transform between Vision coordinates and AVF coordinates.
|
||
|
||
// Compensate for region of interest.
|
||
let roi = regionOfInterest
|
||
roiToGlobalTransform = CGAffineTransform(translationX: roi.origin.x, y: roi.origin.y).scaledBy(x: roi.width, y: roi.height)
|
||
|
||
// Compensate for orientation (buffers always come in the same orientation).
|
||
switch currentOrientation {
|
||
case .landscapeLeft:
|
||
textOrientation = CGImagePropertyOrientation.up
|
||
uiRotationTransform = CGAffineTransform.identity
|
||
case .landscapeRight:
|
||
textOrientation = CGImagePropertyOrientation.down
|
||
uiRotationTransform = CGAffineTransform(translationX: 1, y: 1).rotated(by: CGFloat.pi)
|
||
case .portraitUpsideDown:
|
||
textOrientation = CGImagePropertyOrientation.left
|
||
uiRotationTransform = CGAffineTransform(translationX: 1, y: 0).rotated(by: CGFloat.pi / 2)
|
||
default: // We default everything else to .portraitUp
|
||
textOrientation = CGImagePropertyOrientation.right
|
||
uiRotationTransform = CGAffineTransform(translationX: 0, y: 1).rotated(by: -CGFloat.pi / 2)
|
||
}
|
||
|
||
// Full Vision ROI to AVF transform.
|
||
visionToAVFTransform = roiToGlobalTransform.concatenating(bottomToTopTransform).concatenating(uiRotationTransform)
|
||
}
|
||
|
||
func setupCamera() {
|
||
guard let captureDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: AVMediaType.video, position: .back) else {
|
||
print("Could not create capture device.")
|
||
return
|
||
}
|
||
self.captureDevice = captureDevice
|
||
|
||
// NOTE:
|
||
// Requesting 4k buffers allows recognition of smaller text but will
|
||
// consume more power. Use the smallest buffer size necessary to keep
|
||
// down battery usage.
|
||
if captureDevice.supportsSessionPreset(.hd4K3840x2160) {
|
||
captureSession.sessionPreset = AVCaptureSession.Preset.hd4K3840x2160
|
||
bufferAspectRatio = 3840.0 / 2160.0
|
||
} else {
|
||
captureSession.sessionPreset = AVCaptureSession.Preset.hd1920x1080
|
||
bufferAspectRatio = 1920.0 / 1080.0
|
||
}
|
||
|
||
guard let deviceInput = try? AVCaptureDeviceInput(device: captureDevice) else {
|
||
print("Could not create device input.")
|
||
return
|
||
}
|
||
if captureSession.canAddInput(deviceInput) {
|
||
captureSession.addInput(deviceInput)
|
||
}
|
||
|
||
// Configure video data output.
|
||
videoDataOutput.alwaysDiscardsLateVideoFrames = true
|
||
videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
|
||
videoDataOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_420YpCbCr8BiPlanarFullRange]
|
||
if captureSession.canAddOutput(videoDataOutput) {
|
||
captureSession.addOutput(videoDataOutput)
|
||
// NOTE:
|
||
// There is a trade-off to be made here. Enabling stabilization will
|
||
// give temporally more stable results and should help the recognizer
|
||
// converge. But if it's enabled the VideoDataOutput buffers don't
|
||
// match what's displayed on screen, which makes drawing bounding
|
||
// boxes very hard. Disable it in this app to allow drawing detected
|
||
// bounding boxes on screen.
|
||
videoDataOutput.connection(with: AVMediaType.video)?.preferredVideoStabilizationMode = .off
|
||
} else {
|
||
print("Could not add VDO output")
|
||
return
|
||
}
|
||
|
||
// Set zoom and autofocus to help focus on very small text.
|
||
do {
|
||
try captureDevice.lockForConfiguration()
|
||
captureDevice.videoZoomFactor = 1.5
|
||
captureDevice.autoFocusRangeRestriction = .near
|
||
captureDevice.unlockForConfiguration()
|
||
} catch {
|
||
print("Could not set zoom level due to error: \(error)")
|
||
return
|
||
}
|
||
|
||
captureSession.startRunning()
|
||
}
|
||
|
||
// MARK: - UI drawing and interaction
|
||
|
||
func showString(string: String) {
|
||
// Found a definite number.
|
||
// Stop the camera synchronously to ensure that no further buffers are
|
||
// received. Then update the number view asynchronously.
|
||
captureSessionQueue.sync {
|
||
self.captureSession.stopRunning()
|
||
DispatchQueue.main.async {
|
||
self.numberView.text = string
|
||
self.numberView.isHidden = false
|
||
}
|
||
}
|
||
}
|
||
|
||
@IBAction func handleTap(_ sender: UITapGestureRecognizer) {
|
||
captureSessionQueue.async {
|
||
if !self.captureSession.isRunning {
|
||
self.captureSession.startRunning()
|
||
}
|
||
DispatchQueue.main.async {
|
||
self.numberView.isHidden = true
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// MARK: - AVCaptureVideoDataOutputSampleBufferDelegate
|
||
|
||
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
|
||
|
||
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
|
||
// This is implemented in VisionViewController.
|
||
}
|
||
}
|
||
|
||
// MARK: - Utility extensions
|
||
|
||
extension AVCaptureVideoOrientation {
|
||
init?(deviceOrientation: UIDeviceOrientation) {
|
||
switch deviceOrientation {
|
||
case .portrait: self = .portrait
|
||
case .portraitUpsideDown: self = .portraitUpsideDown
|
||
case .landscapeLeft: self = .landscapeRight
|
||
case .landscapeRight: self = .landscapeLeft
|
||
default: return nil
|
||
}
|
||
}
|
||
}
|