From 0e720f90423f5917edc7150b4cbdf139dbdbf96e Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 18 May 2025 21:45:24 -0400 Subject: [PATCH 1/9] Use the `list.append` function, which is safe for strings --- tubesync/sync/youtube.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tubesync/sync/youtube.py b/tubesync/sync/youtube.py index 9814db248..c795261a1 100644 --- a/tubesync/sync/youtube.py +++ b/tubesync/sync/youtube.py @@ -326,13 +326,10 @@ def download_media( final_path = Path(output_file).resolve(strict=False) expected_file = shell_quote(str(final_path)) cmds = pp_opts.exec_cmd.get('after_move', list()) - # It is important that we use a tuple for strings. - # Otherwise, list adds each character instead. - # That last comma is really necessary! - cmds += ( + cmds.append( f'test -f {expected_file} || ' 'mv -T -u -- %(filepath,_filename|)q ' - f'{expected_file}', + f'{expected_file}' ) # assignment is the quickest way to cover both 'get' cases pp_opts.exec_cmd['after_move'] = cmds @@ -387,7 +384,7 @@ def download_media( youtube_ea_dict = ytopts['extractor_args'].get('youtube', dict()) formats_list = youtube_ea_dict.get('formats', list()) if 'missing_pot' not in formats_list: - formats_list += ('missing_pot',) + formats_list.append('missing_pot') youtube_ea_dict.update({ 'formats': formats_list, }) From 971cea5c0eef12e4baa829985e43ec5b653866f9 Mon Sep 17 00:00:00 2001 From: tcely Date: Sun, 18 May 2025 22:26:06 -0400 Subject: [PATCH 2/9] Use the less ambiguous `list.extend` function --- tubesync/sync/models/source.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/models/source.py b/tubesync/sync/models/source.py index 74f752780..063907f38 100644 --- a/tubesync/sync/models/source.py +++ b/tubesync/sync/models/source.py @@ -527,8 +527,8 @@ def get_index(self, type): days = timezone.timedelta(seconds=self.download_cap).days response = indexer(self.get_index_url(type=type), days=days) if not isinstance(response, dict): - return [] - entries = response.get('entries', []) + return list() + entries = response.get('entries', list()) return entries def index_media(self): @@ -537,11 +537,11 @@ def index_media(self): ''' entries = list() if self.index_videos: - entries += self.get_index('videos') + entries.extend(self.get_index('videos')) # Playlists do something different that I have yet to figure out if not self.is_playlist: if self.index_streams: - entries += self.get_index('streams') + entries.extend(self.get_index('streams')) if settings.MAX_ENTRIES_PROCESSING: entries = entries[:settings.MAX_ENTRIES_PROCESSING] From 8ba9ce40782e231f377148ae0504e319f77f9e9f Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 19 May 2025 00:10:13 -0400 Subject: [PATCH 3/9] Use a queue to avoid list memory performance --- tubesync/sync/models/source.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tubesync/sync/models/source.py b/tubesync/sync/models/source.py index 063907f38..ebe907673 100644 --- a/tubesync/sync/models/source.py +++ b/tubesync/sync/models/source.py @@ -1,6 +1,7 @@ import os import re import uuid +from collections import deque from pathlib import Path from django import db from django.conf import settings @@ -533,17 +534,20 @@ def get_index(self, type): def index_media(self): ''' - Index the media source returning a list of media metadata as dicts. + Index the media source returning a queue of media metadata as dicts. ''' - entries = list() + entries = deque(list(), settings.get('MAX_ENTRIES_PROCESSING', 0) or None) if self.index_videos: entries.extend(self.get_index('videos')) + # Playlists do something different that I have yet to figure out if not self.is_playlist: if self.index_streams: - entries.extend(self.get_index('streams')) + streams = self.get_index('streams') + # do not allow streams to consume all of the queue + if entries.maxlen and entries.maxlen <= len(streams): + streams = streams[-1 * ( entries.maxlen // 2 ) :] + entries.extend(reversed(streams)) - if settings.MAX_ENTRIES_PROCESSING: - entries = entries[:settings.MAX_ENTRIES_PROCESSING] return entries From c7c37b12d89a1044665c213b114d55b05fd5e82e Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 19 May 2025 00:12:47 -0400 Subject: [PATCH 4/9] Use `queue` for better readability --- tubesync/sync/models/source.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tubesync/sync/models/source.py b/tubesync/sync/models/source.py index ebe907673..a739936ae 100644 --- a/tubesync/sync/models/source.py +++ b/tubesync/sync/models/source.py @@ -1,7 +1,7 @@ import os import re import uuid -from collections import deque +from collections import deque as queue from pathlib import Path from django import db from django.conf import settings @@ -536,7 +536,7 @@ def index_media(self): ''' Index the media source returning a queue of media metadata as dicts. ''' - entries = deque(list(), settings.get('MAX_ENTRIES_PROCESSING', 0) or None) + entries = queue(list(), settings.get('MAX_ENTRIES_PROCESSING', 0) or None) if self.index_videos: entries.extend(self.get_index('videos')) From e84643b95e517a3d28f0020741763442fdcbb4f7 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 19 May 2025 01:44:49 -0400 Subject: [PATCH 5/9] Handle unbalanced channels better --- tubesync/sync/models/source.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/models/source.py b/tubesync/sync/models/source.py index a739936ae..04fe84d48 100644 --- a/tubesync/sync/models/source.py +++ b/tubesync/sync/models/source.py @@ -544,10 +544,15 @@ def index_media(self): if not self.is_playlist: if self.index_streams: streams = self.get_index('streams') - # do not allow streams to consume all of the queue - if entries.maxlen and entries.maxlen <= len(streams): - streams = streams[-1 * ( entries.maxlen // 2 ) :] - entries.extend(reversed(streams)) + if entries.maxlen is None or 0 == len(entries): + entries.extend(streams) + else: + # share the queue between streams and videos + allowed_streams = max( + entries.maxlen // 2, + entries.maxlen - len(entries), + ) + entries.extend(streams[-1 * allowed_streams :]) return entries From 01fcf5ec66475238918eacfff7bc0b759d184882 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 19 May 2025 07:17:56 -0400 Subject: [PATCH 6/9] Reverse the lists when adding to the queue The default sorting appears to be newest first. We want the oldest to be dropped from the queue, by any limits instead. --- tubesync/sync/models/source.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tubesync/sync/models/source.py b/tubesync/sync/models/source.py index 04fe84d48..776ddfe86 100644 --- a/tubesync/sync/models/source.py +++ b/tubesync/sync/models/source.py @@ -538,21 +538,21 @@ def index_media(self): ''' entries = queue(list(), settings.get('MAX_ENTRIES_PROCESSING', 0) or None) if self.index_videos: - entries.extend(self.get_index('videos')) + entries.extend(reversed(self.get_index('videos'))) # Playlists do something different that I have yet to figure out if not self.is_playlist: if self.index_streams: streams = self.get_index('streams') if entries.maxlen is None or 0 == len(entries): - entries.extend(streams) + entries.extend(reversed(streams)) else: # share the queue between streams and videos allowed_streams = max( entries.maxlen // 2, entries.maxlen - len(entries), ) - entries.extend(streams[-1 * allowed_streams :]) + entries.extend(reversed(streams[-1 * allowed_streams :])) return entries From 4cbcd7688536f6a2e19bfa4d79fcd971e0a232d5 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 19 May 2025 07:36:37 -0400 Subject: [PATCH 7/9] Adjust for a queue instead of a list --- tubesync/sync/tasks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index 0467a4fd1..b192ccfb9 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -316,7 +316,10 @@ def index_source_task(source_id): end=task.verbose_name.find('Index'), ) tvn_format = '{:,}' + f'/{num_videos:,}' - for vn, video in enumerate(videos, start=1): + vn = 0 + while len(videos) > 0: + vn += 1 + video = videos.popleft() # Create or update each video as a Media object key = video.get(source.key_field, None) if not key: From 8109d6a836c4b1c0ff16f01d874621105cc5c648 Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 19 May 2025 08:09:56 -0400 Subject: [PATCH 8/9] Use `getattr` not `dict.get` for `settings` --- tubesync/sync/models/source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tubesync/sync/models/source.py b/tubesync/sync/models/source.py index 776ddfe86..824865215 100644 --- a/tubesync/sync/models/source.py +++ b/tubesync/sync/models/source.py @@ -536,7 +536,7 @@ def index_media(self): ''' Index the media source returning a queue of media metadata as dicts. ''' - entries = queue(list(), settings.get('MAX_ENTRIES_PROCESSING', 0) or None) + entries = queue(list(), getattr(settings, 'MAX_ENTRIES_PROCESSING', 0) or None) if self.index_videos: entries.extend(reversed(self.get_index('videos'))) From ed55710f26e08657f206aef48080eb2f28a5319b Mon Sep 17 00:00:00 2001 From: tcely Date: Mon, 19 May 2025 08:54:21 -0400 Subject: [PATCH 9/9] Create a set of keys for `cleanup_removed_media` --- tubesync/sync/tasks.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tubesync/sync/tasks.py b/tubesync/sync/tasks.py index b192ccfb9..6d0727c01 100644 --- a/tubesync/sync/tasks.py +++ b/tubesync/sync/tasks.py @@ -255,7 +255,7 @@ def cleanup_old_media(): schedule_media_servers_update() -def cleanup_removed_media(source, videos): +def cleanup_removed_media(source, video_keys): if not source.delete_removed_media: return log.info(f'Cleaning up media no longer in source: {source}') @@ -265,8 +265,7 @@ def cleanup_removed_media(source, videos): source=source, ) for media in qs_gen(mqs): - matching_source_item = [video['id'] for video in videos if video['id'] == media.key] - if not matching_source_item: + if media.key not in video_keys: log.info(f'{media.name} is no longer in source, removing') with atomic(): media.delete() @@ -317,6 +316,7 @@ def index_source_task(source_id): ) tvn_format = '{:,}' + f'/{num_videos:,}' vn = 0 + video_keys = set() while len(videos) > 0: vn += 1 video = videos.popleft() @@ -325,6 +325,7 @@ def index_source_task(source_id): if not key: # Video has no unique key (ID), it can't be indexed continue + video_keys.add(key) update_task_status(task, tvn_format.format(vn)) # media, new_media = Media.objects.get_or_create(key=key, source=source) try: @@ -379,7 +380,7 @@ def index_source_task(source_id): # Reset task.verbose_name to the saved value update_task_status(task, None) # Cleanup of media no longer available from the source - cleanup_removed_media(source, videos) + cleanup_removed_media(source, video_keys) videos = video = None