diff --git a/Examples/BeeTrackingTool/main.swift b/Examples/BeeTrackingTool/main.swift index ac2712f6..5949e197 100644 --- a/Examples/BeeTrackingTool/main.swift +++ b/Examples/BeeTrackingTool/main.swift @@ -9,7 +9,7 @@ import TensorFlow struct BeeTrackingTool: ParsableCommand { static var configuration = CommandConfiguration( - subcommands: [TrainRAE.self, InferTrackRAE.self, InferTrackRawPixels.self]) + subcommands: [TrainRAE.self, InferTrackRAE.self, InferTrackRawPixels.self, NaiveRae.self]) } /// The dimension of the hidden layer in the appearance model. @@ -164,35 +164,150 @@ struct InferTrackRAE: ParsableCommand { /// Infers a track on a VOT video, using the raw pixel tracker. struct InferTrackRawPixels: ParsableCommand { - @Option(help: "Base directory of the VOT dataset") - var votBaseDirectory: String + func run() { - @Option(help: "Name of the VOT video to use") - var videoName: String + func rawPixelTracker(_ frames: [Tensor], _ start: OrientedBoundingBox) -> [OrientedBoundingBox] { + var tracker = makeRawPixelTracker(frames: frames, target: frames[0].patch(at: start)) + tracker.optimizer.precision = 1e0 + let prediction = tracker.infer(knownStart: Tuple1(start.center)) + return tracker.frameVariableIDs.map { varIds in + let poseId = varIds.head + return OrientedBoundingBox(center: prediction[poseId], rows: start.rows, cols: start.cols) + } + } - @Option(help: "How many frames to track") - var frameCount: Int = 50 + var dataset = OISTBeeVideo()! + // Only do inference on the interesting tracks. + dataset.tracks = [3, 5, 6, 7].map { dataset.tracks[$0] } + let trackerEvaluationDataset = TrackerEvaluationDataset(dataset) + let eval = trackerEvaluationDataset.evaluate( + rawPixelTracker, sequenceCount: dataset.tracks.count, deltaAnchor: 100, outputFile: "rawpixel.json") + print(eval.trackerMetrics.accuracy) + print(eval.trackerMetrics.robustness) + } +} - @Flag(help: "Print progress information") +/// Tracking with a Naive Bayes with RAE +struct NaiveRae: ParsableCommand { + @Option(help: "Where to load the RAE weights") + var loadWeights: String + + @Option(help: "The dimension of the latent code in the RAE appearance model") + var kLatentDimension: Int + + @Option(help: "The dimension of the hidden code in the RAE appearance model") + var kHiddenDimension = 100 + + @Flag var verbose: Bool = false - func run() { - let video = VOTVideo(votBaseDirectory: votBaseDirectory, videoName: videoName)! - let videoSlice = video[0.. OrientedBoundingBox in - let poseID = frameVariableIDs.head - return OrientedBoundingBox( - center: prediction[poseID], rows: video.track[0].rows, cols: video.track[0].cols) + func tracker(_ frames: [Tensor], _ start: OrientedBoundingBox) -> [OrientedBoundingBox] { + var tracker = makeNaiveBayesAETracker( + model: rae, + statistics: statistics, + frames: frames, + targetSize: (start.rows, start.cols), + foregroundModel: foregroundModel, backgroundModel: backgroundModel + ) + tracker.optimizer.cgls_precision = 1e-5 + tracker.optimizer.precision = 1e-3 + tracker.optimizer.max_iteration = 200 + let prediction = tracker.infer(knownStart: Tuple1(start.center)) + return tracker.frameVariableIDs.map { varIds in + let poseId = varIds.head + return OrientedBoundingBox(center: prediction[poseId], rows: start.rows, cols: start.cols) + } + } + + // Only do inference on the interesting tracks. + var evalDataset = OISTBeeVideo(truncate: truncate)! + evalDataset.tracks = [3, 5, 6, 7].map { evalDataset.tracks[$0] } + let trackerEvaluationDataset = TrackerEvaluationDataset(evalDataset) + let eval = trackerEvaluationDataset.evaluate( + tracker, sequenceCount: evalDataset.tracks.count, deltaAnchor: 500, outputFile: outputFile) + print(eval.trackerMetrics.accuracy) + print(eval.trackerMetrics.robustness) + } + + func run() { + if verbose { + print("Loading dataset...") + } + + startTimer("DATASET_LOAD") + let dataset: OISTBeeVideo = OISTBeeVideo(deferLoadingFrames: true)! + stopTimer("DATASET_LOAD") + + if verbose { + print("Tracking...") + } + + startTimer("RAE_TRACKING") + naiveRaeTrack(dataset: dataset) + stopTimer("RAE_TRACKING") + + if verbose { + printTimers() } print(boxes.count) diff --git a/Examples/OISTVisualizationTool/main.swift b/Examples/OISTVisualizationTool/main.swift index c7c180f1..3e85435e 100644 --- a/Examples/OISTVisualizationTool/main.swift +++ b/Examples/OISTVisualizationTool/main.swift @@ -24,7 +24,7 @@ import Foundation struct OISTVisualizationTool: ParsableCommand { static var configuration = CommandConfiguration( - subcommands: [VisualizeTrack.self, ViewFrame.self, RawTrack.self, PpcaTrack.self, NaiveRae.self, TrainRAE.self, NaivePca.self]) + subcommands: [VisualizePrediction.self, VisualizeTrack.self, ViewFrame.self, RawTrack.self, PpcaTrack.self, NaiveRae.self, TrainRAE.self, NaivePca.self]) } /// View a frame with bounding boxes @@ -510,26 +510,23 @@ struct TrainRAE: ParsableCommand { /// /// Tracking with a Naive Bayes with RAE struct NaivePca: ParsableCommand { - @Option(help: "Where to load the RAE weights") - var loadWeights: String = "./oist_rae_weight.npy" - - @Option(help: "Which bounding box to track") - var boxId: Int = 0 - - @Option(help: "Track for how many frames") - var trackFrames: Int = 10 - - @Option(help: "Track the target from frame x") - var trackStartFrame: Int = 250 - @Option(help: "The dimension of the latent code in the RAE appearance model") - var kLatentDimension = 10 + var kLatentDimension = 20 @Flag(help: "Print progress information") var verbose: Bool = false + @Flag(help: "Use random projections instead of learned PPCA vectors") + var randomProjections: Bool = false + + @Option + var outputFile: String + + @Option + var truncate: Int + /// Returns predictions for `videoName` using the raw pixel tracker. - func naivePpcaTrack(dataset dataset_: OISTBeeVideo, length: Int, startFrom: Int) -> [OrientedBoundingBox] { + func naivePpcaTrack(dataset dataset_: OISTBeeVideo) { var dataset = dataset_ dataset.labels = dataset.labels.map { $0.filter({ $0.label == .Body }) @@ -549,7 +546,11 @@ struct NaivePca: ParsableCommand { var ppca = PPCA(latentSize: kLatentDimension) ppca.train(images: batch) - + + if randomProjections { + ppca.W_inv = Tensor(randomNormal: ppca.W_inv!.shape) + } + if verbose { print("Fitting Naive Bayes model") } var (foregroundModel, backgroundModel) = ( @@ -573,48 +574,32 @@ struct NaivePca: ParsableCommand { print("Background: \(backgroundModel)") } - if verbose { print("Loading video frames...") } - startTimer("VIDEO_LOAD") - // Load the video and take a slice of it. - let videos = (0.. Tensor in - return withDevice(.cpu) { dataset.loadFrame(dataset.frameIds[startFrom + i])! } - } - stopTimer("VIDEO_LOAD") - - let startPose = dataset.labels[startFrom][boxId].location.center - - if verbose { - print("Creating tracker, startPose = \(startPose)") - } - - startTimer("MAKE_GRAPH") - var tracker = makeNaiveBayesPCATracker( - model: ppca, - statistics: statistics, - frames: videos, - targetSize: (dataset.labels[startFrom][boxId].location.rows, dataset.labels[startFrom][boxId].location.cols), - foregroundModel: foregroundModel, backgroundModel: backgroundModel - ) - stopTimer("MAKE_GRAPH") - - if verbose { print("Starting Optimization...") } - if verbose { tracker.optimizer.verbosity = .SUMMARY } - - tracker.optimizer.cgls_precision = 1e-7 - tracker.optimizer.precision = 1e-4 - tracker.optimizer.max_iteration = 200 - - startTimer("GRAPH_INFER") - let prediction = tracker.infer(knownStart: Tuple1(startPose)) - stopTimer("GRAPH_INFER") - - let boxes = tracker.frameVariableIDs.map { frameVariableIDs -> OrientedBoundingBox in - let poseID = frameVariableIDs.head - return OrientedBoundingBox( - center: prediction[poseID], rows: dataset.labels[startFrom][boxId].location.rows, cols: dataset.labels[startFrom][boxId].location.cols) + func tracker(_ frames: [Tensor], _ start: OrientedBoundingBox) -> [OrientedBoundingBox] { + var tracker = makeNaiveBayesPCATracker( + model: ppca, + statistics: statistics, + frames: frames, + targetSize: (start.rows, start.cols), + foregroundModel: foregroundModel, backgroundModel: backgroundModel + ) + tracker.optimizer.cgls_precision = 1e-9 + tracker.optimizer.precision = 1e-6 + tracker.optimizer.max_iteration = 200 + let prediction = tracker.infer(knownStart: Tuple1(start.center)) + return tracker.frameVariableIDs.map { varIds in + let poseId = varIds.head + return OrientedBoundingBox(center: prediction[poseId], rows: start.rows, cols: start.cols) + } } - return boxes + // Only do inference on the interesting tracks. + var evalDataset = OISTBeeVideo(truncate: truncate)! + evalDataset.tracks = [3, 5, 6, 7].map { evalDataset.tracks[$0] } + let trackerEvaluationDataset = TrackerEvaluationDataset(evalDataset) + let eval = trackerEvaluationDataset.evaluate( + tracker, sequenceCount: evalDataset.tracks.count, deltaAnchor: 500, outputFile: outputFile) + print(eval.trackerMetrics.accuracy) + print(eval.trackerMetrics.robustness) } func run() { @@ -631,25 +616,8 @@ struct NaivePca: ParsableCommand { } startTimer("PPCA_TRACKING") - var bboxes: [OrientedBoundingBox] - bboxes = naivePpcaTrack(dataset: dataset, length: trackFrames, startFrom: trackStartFrame) + naivePpcaTrack(dataset: dataset) stopTimer("PPCA_TRACKING") - - let frameRawId = dataset.frameIds[trackStartFrame + trackFrames] - let image = dataset.loadFrame(frameRawId)! - - if verbose { - print("Creating output plot") - } - startTimer("PLOTTING") - plot(image, boxes: bboxes.indices.map { - ("\($0)", bboxes[$0]) - }, margin: 10.0, scale: 0.5).show() - stopTimer("PLOTTING") - - if verbose { - printTimers() - } } } @@ -666,6 +634,32 @@ struct VisualizeTrack: ParsableCommand { } } +struct VisualizePrediction: ParsableCommand { + @Option + var prediction: String + + @Option + var subsequenceIndex: Int = 0 + + // TODO: I think I should save this in the prediction so that we do not need to specify it! + @Option + var startFrame: Int + + @Option + var output: String + + func run() { + let dataset = OISTBeeVideo(deferLoadingFrames: true)! + let decoder = JSONDecoder() + let data = try! Data(contentsOf: URL(fileURLWithPath: prediction)) + let sequence = try! decoder.decode(SequenceEvaluationResults.self, from: data) + + let track = OISTBeeTrack( + startFrameIndex: startFrame, boxes: sequence.subsequences[subsequenceIndex].prediction) + track.render(to: output, video: dataset) + } +} + // It is important to set the global threadpool before doing anything else, so that nothing // accidentally uses the default threadpool. ComputeThreadPools.global = diff --git a/Sources/BeeTracking/OISTBeeVideo+Batches.swift b/Sources/BeeTracking/OISTBeeVideo+Batches.swift index 6849dc93..87158292 100644 --- a/Sources/BeeTracking/OISTBeeVideo+Batches.swift +++ b/Sources/BeeTracking/OISTBeeVideo+Batches.swift @@ -91,12 +91,12 @@ extension OISTBeeVideo { Double.random(in: Double(maxSide)..