From 182b23d0957fd4bb7b7d8158a0d64204fdd49305 Mon Sep 17 00:00:00 2001 From: janci007 Date: Mon, 8 Feb 2021 09:07:07 +0100 Subject: [PATCH] Main loop profiling and disabling unnecessary parts This is not to be merged, only suggestions on what parts are not always required to be calculated and should be skipped if applicable to gain more FPS. The code outputs time spent in each part of the computation in theoretical FPS (what FPS would that part of code achieve if it was running independently) - allows you to compare performance of operations and see the bottleneck. --- virtual_webcam.py | 84 +++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 71 insertions(+), 13 deletions(-) diff --git a/virtual_webcam.py b/virtual_webcam.py index 4ca2be7..a21723b 100755 --- a/virtual_webcam.py +++ b/virtual_webcam.py @@ -18,6 +18,7 @@ from bodypix_functions import to_mask_tensor import filters +import time def load_config(config_mtime, oldconfig={}): @@ -147,8 +148,11 @@ def reload_layers(config): if config['real_video_device'].lower().endswith(extension): success, static_image = cap.read() +frameNo = 0 + def mainloop(): - global config, masks, layers, config_mtime + global config, masks, layers, config_mtime, frameNo + time_0_start = time.time() config, config_mtime_new = load_config(config_mtime, config) if config_mtime != config_mtime_new: @@ -158,6 +162,8 @@ def mainloop(): layers = reload_layers(config) config_mtime = config_mtime_new + time_1_config = time.time() + if static_image is not None: success, frame = True, static_image else: @@ -169,6 +175,8 @@ def mainloop(): frame = frame[...,::-1] frame = frame.astype(np.float) + time_2_capture = time.time() + input_height, input_width = frame.shape[:2] internal_resolution = config.get("internal_resolution", 0.5) @@ -184,6 +192,8 @@ def mainloop(): resized_height, resized_width = resized_frame.shape[:2] + time_3_resize = time.time() + # Preprocessing if model_type == "mobilenet": resized_frame = np.divide(resized_frame, 127.5) @@ -196,9 +206,12 @@ def mainloop(): sample_image = resized_frame[tf.newaxis, ...] + time_4_preprocess = time.time() + results = sess.run(output_tensor_names, feed_dict={input_tensor: sample_image}) + time_5_network = time.time() if model_type == "mobilenet": segment_logits = results[1] @@ -208,30 +221,50 @@ def mainloop(): segment_logits = results[6] part_heatmaps = results[5] heatmaps = results[2] + + time_6_result1 = time.time() scaled_segment_scores = scale_and_crop_to_input_tensor_shape( segment_logits, input_height, input_width, padT, padB, padL, padR, True ) + + time_6_result2 = time.time() - scaled_part_heatmap_scores = scale_and_crop_to_input_tensor_shape( - part_heatmaps, input_height, input_width, - padT, padB, padL, padR, True - ) + # not needed when doing only background replace + # scaled_part_heatmap_scores = scale_and_crop_to_input_tensor_shape( + # part_heatmaps, input_height, input_width, + # padT, padB, padL, padR, True + # ) - scaled_heatmap_scores = scale_and_crop_to_input_tensor_shape( - heatmaps, input_height, input_width, - padT, padB, padL, padR, True - ) + time_6_result3 = time.time() + + # not needed when doing only background replace + # scaled_heatmap_scores = scale_and_crop_to_input_tensor_shape( + # heatmaps, input_height, input_width, + # padT, padB, padL, padR, True + # ) + + time_6_result4 = time.time() mask = to_mask_tensor(scaled_segment_scores, config.get("segmentation_threshold", 0.75)) + + time_6_result5 = time.time() + mask = np.reshape(mask, mask.shape[:2]) - part_masks = to_mask_tensor(scaled_part_heatmap_scores, 0.999) - part_masks = np.array(part_masks) - heatmap_masks = to_mask_tensor(scaled_heatmap_scores, 0.99) - heatmap_masks = np.array(heatmap_masks) + time_6_result6 = time.time() + + # not needed when doing only background replace + #part_masks = to_mask_tensor(scaled_part_heatmap_scores, 0.999) + #part_masks = np.array(part_masks) + #heatmap_masks = to_mask_tensor(scaled_heatmap_scores, 0.99) + #heatmap_masks = np.array(heatmap_masks) + part_masks = None + heatmap_masks = None + + time_6_result = time.time() # Average over the last N masks to reduce flickering # (at the cost of seeing afterimages) @@ -259,6 +292,8 @@ def mainloop(): frame = np.append(frame, np.expand_dims(mask, axis=2), axis=2) + time_7_postprocess = time.time() + input_frame = frame.copy() frame = np.zeros(input_frame.shape) for layer_type, layer_filters in layers: @@ -287,6 +322,8 @@ def mainloop(): else: frame[:,:,:3] = layer_frame[:,:,:3].copy() + time_8_compose = time.time() + # Remove alpha channel frame = frame[:,:,:3] @@ -308,6 +345,27 @@ def mainloop(): frame = frame.astype(np.uint8) fakewebcam.schedule_frame(frame) + time_9_show = time.time() + # Output the theoretical FPS for each processing stage + if (frameNo % 100) == 0: + print("Config {}".format(1/(time_1_config - time_0_start))) + print("Capture {}".format(1/(time_2_capture - time_1_config))) + print("Resize {}".format(1/(time_3_resize - time_2_capture))) + print("Preprocess {}".format(1/(time_4_preprocess - time_3_resize))) + print("Network {}".format(1/(time_5_network - time_4_preprocess))) + print("Result {}".format(1/(time_6_result - time_5_network))) + print("Result 1 {}".format(1/(time_6_result1 - time_5_network))) + print("Result 2 {}".format(1/(time_6_result2 - time_6_result1))) + #print("Result 3 {}".format(1/(time_6_result3 - time_6_result2))) + #print("Result 4 {}".format(1/(time_6_result4 - time_6_result3))) + print("Result 5 {}".format(1/(time_6_result5 - time_6_result4))) + print("Result 6 {}".format(1/(time_6_result6 - time_6_result5))) + print("Postprocess {}".format(1/(time_7_postprocess - time_6_result))) + print("Compose {}".format(1/(time_8_compose - time_7_postprocess))) + print("Show {}".format(1/(time_9_show - time_8_compose))) + print("Total {}".format(1/(time_9_show - time_0_start))) + print() + frameNo = frameNo + 1 if __name__ == "__main__": while True: